VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102829

Last change on this file since 102829 was 102801, checked in by vboxsync, 16 months ago

VMM/IEM: Stats, go straight for the safe fallback functions in the memory access helpers. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 620.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102801 2024-01-09 22:53:03Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * TLB Lookup config.
107 */
108#if (defined(RT_ARCH_AMD64) && 1) || (defined(RT_ARCH_ARM64) && 1)
109# define IEMNATIVE_WITH_TLB_LOOKUP
110#endif
111#ifdef IEMNATIVE_WITH_TLB_LOOKUP
112# define IEMNATIVE_WITH_TLB_LOOKUP_FETCH
113#endif
114#ifdef IEMNATIVE_WITH_TLB_LOOKUP
115# define IEMNATIVE_WITH_TLB_LOOKUP_STORE
116#endif
117#ifdef IEMNATIVE_WITH_TLB_LOOKUP
118# define IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
119#endif
120#ifdef IEMNATIVE_WITH_TLB_LOOKUP
121# define IEMNATIVE_WITH_TLB_LOOKUP_PUSH
122#endif
123#ifdef IEMNATIVE_WITH_TLB_LOOKUP
124# define IEMNATIVE_WITH_TLB_LOOKUP_POP
125#endif
126
127
128/*
129 * Narrow down configs here to avoid wasting time on unused configs here.
130 * Note! Same checks in IEMAllThrdRecompiler.cpp.
131 */
132
133#ifndef IEM_WITH_CODE_TLB
134# error The code TLB must be enabled for the recompiler.
135#endif
136
137#ifndef IEM_WITH_DATA_TLB
138# error The data TLB must be enabled for the recompiler.
139#endif
140
141#ifndef IEM_WITH_SETJMP
142# error The setjmp approach must be enabled for the recompiler.
143#endif
144
145/** @todo eliminate this clang build hack. */
146#if RT_CLANG_PREREQ(4, 0)
147# pragma GCC diagnostic ignored "-Wunused-function"
148#endif
149
150
151/*********************************************************************************************************************************
152* Internal Functions *
153*********************************************************************************************************************************/
154#ifdef VBOX_STRICT
155static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
156 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
157static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
158#endif
159#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
160static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
161static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
162#endif
163DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
164DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
165 IEMNATIVEGSTREG enmGstReg, uint32_t off);
166DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
167
168
169/*********************************************************************************************************************************
170* Executable Memory Allocator *
171*********************************************************************************************************************************/
172/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
173 * Use an alternative chunk sub-allocator that does store internal data
174 * in the chunk.
175 *
176 * Using the RTHeapSimple is not practial on newer darwin systems where
177 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
178 * memory. We would have to change the protection of the whole chunk for
179 * every call to RTHeapSimple, which would be rather expensive.
180 *
181 * This alternative implemenation let restrict page protection modifications
182 * to the pages backing the executable memory we just allocated.
183 */
184#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
185/** The chunk sub-allocation unit size in bytes. */
186#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
187/** The chunk sub-allocation unit size as a shift factor. */
188#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
189
190#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
191# ifdef IEMNATIVE_USE_GDB_JIT
192# define IEMNATIVE_USE_GDB_JIT_ET_DYN
193
194/** GDB JIT: Code entry. */
195typedef struct GDBJITCODEENTRY
196{
197 struct GDBJITCODEENTRY *pNext;
198 struct GDBJITCODEENTRY *pPrev;
199 uint8_t *pbSymFile;
200 uint64_t cbSymFile;
201} GDBJITCODEENTRY;
202
203/** GDB JIT: Actions. */
204typedef enum GDBJITACTIONS : uint32_t
205{
206 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
207} GDBJITACTIONS;
208
209/** GDB JIT: Descriptor. */
210typedef struct GDBJITDESCRIPTOR
211{
212 uint32_t uVersion;
213 GDBJITACTIONS enmAction;
214 GDBJITCODEENTRY *pRelevant;
215 GDBJITCODEENTRY *pHead;
216 /** Our addition: */
217 GDBJITCODEENTRY *pTail;
218} GDBJITDESCRIPTOR;
219
220/** GDB JIT: Our simple symbol file data. */
221typedef struct GDBJITSYMFILE
222{
223 Elf64_Ehdr EHdr;
224# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
225 Elf64_Shdr aShdrs[5];
226# else
227 Elf64_Shdr aShdrs[7];
228 Elf64_Phdr aPhdrs[2];
229# endif
230 /** The dwarf ehframe data for the chunk. */
231 uint8_t abEhFrame[512];
232 char szzStrTab[128];
233 Elf64_Sym aSymbols[3];
234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
235 Elf64_Sym aDynSyms[2];
236 Elf64_Dyn aDyn[6];
237# endif
238} GDBJITSYMFILE;
239
240extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
241extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
242
243/** Init once for g_IemNativeGdbJitLock. */
244static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
245/** Init once for the critical section. */
246static RTCRITSECT g_IemNativeGdbJitLock;
247
248/** GDB reads the info here. */
249GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
250
251/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
252DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
253{
254 ASMNopPause();
255}
256
257/** @callback_method_impl{FNRTONCE} */
258static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
259{
260 RT_NOREF(pvUser);
261 return RTCritSectInit(&g_IemNativeGdbJitLock);
262}
263
264
265# endif /* IEMNATIVE_USE_GDB_JIT */
266
267/**
268 * Per-chunk unwind info for non-windows hosts.
269 */
270typedef struct IEMEXECMEMCHUNKEHFRAME
271{
272# ifdef IEMNATIVE_USE_LIBUNWIND
273 /** The offset of the FDA into abEhFrame. */
274 uintptr_t offFda;
275# else
276 /** 'struct object' storage area. */
277 uint8_t abObject[1024];
278# endif
279# ifdef IEMNATIVE_USE_GDB_JIT
280# if 0
281 /** The GDB JIT 'symbol file' data. */
282 GDBJITSYMFILE GdbJitSymFile;
283# endif
284 /** The GDB JIT list entry. */
285 GDBJITCODEENTRY GdbJitEntry;
286# endif
287 /** The dwarf ehframe data for the chunk. */
288 uint8_t abEhFrame[512];
289} IEMEXECMEMCHUNKEHFRAME;
290/** Pointer to per-chunk info info for non-windows hosts. */
291typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
292#endif
293
294
295/**
296 * An chunk of executable memory.
297 */
298typedef struct IEMEXECMEMCHUNK
299{
300#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
301 /** Number of free items in this chunk. */
302 uint32_t cFreeUnits;
303 /** Hint were to start searching for free space in the allocation bitmap. */
304 uint32_t idxFreeHint;
305#else
306 /** The heap handle. */
307 RTHEAPSIMPLE hHeap;
308#endif
309 /** Pointer to the chunk. */
310 void *pvChunk;
311#ifdef IN_RING3
312 /**
313 * Pointer to the unwind information.
314 *
315 * This is used during C++ throw and longjmp (windows and probably most other
316 * platforms). Some debuggers (windbg) makes use of it as well.
317 *
318 * Windows: This is allocated from hHeap on windows because (at least for
319 * AMD64) the UNWIND_INFO structure address in the
320 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
321 *
322 * Others: Allocated from the regular heap to avoid unnecessary executable data
323 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
324 void *pvUnwindInfo;
325#elif defined(IN_RING0)
326 /** Allocation handle. */
327 RTR0MEMOBJ hMemObj;
328#endif
329} IEMEXECMEMCHUNK;
330/** Pointer to a memory chunk. */
331typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
332
333
334/**
335 * Executable memory allocator for the native recompiler.
336 */
337typedef struct IEMEXECMEMALLOCATOR
338{
339 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
340 uint32_t uMagic;
341
342 /** The chunk size. */
343 uint32_t cbChunk;
344 /** The maximum number of chunks. */
345 uint32_t cMaxChunks;
346 /** The current number of chunks. */
347 uint32_t cChunks;
348 /** Hint where to start looking for available memory. */
349 uint32_t idxChunkHint;
350 /** Statistics: Current number of allocations. */
351 uint32_t cAllocations;
352
353 /** The total amount of memory available. */
354 uint64_t cbTotal;
355 /** Total amount of free memory. */
356 uint64_t cbFree;
357 /** Total amount of memory allocated. */
358 uint64_t cbAllocated;
359
360#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
361 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
362 *
363 * Since the chunk size is a power of two and the minimum chunk size is a lot
364 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
365 * require a whole number of uint64_t elements in the allocation bitmap. So,
366 * for sake of simplicity, they are allocated as one continous chunk for
367 * simplicity/laziness. */
368 uint64_t *pbmAlloc;
369 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
370 uint32_t cUnitsPerChunk;
371 /** Number of bitmap elements per chunk (for quickly locating the bitmap
372 * portion corresponding to an chunk). */
373 uint32_t cBitmapElementsPerChunk;
374#else
375 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
376 * @{ */
377 /** The size of the heap internal block header. This is used to adjust the
378 * request memory size to make sure there is exacly enough room for a header at
379 * the end of the blocks we allocate before the next 64 byte alignment line. */
380 uint32_t cbHeapBlockHdr;
381 /** The size of initial heap allocation required make sure the first
382 * allocation is correctly aligned. */
383 uint32_t cbHeapAlignTweak;
384 /** The alignment tweak allocation address. */
385 void *pvAlignTweak;
386 /** @} */
387#endif
388
389#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
390 /** Pointer to the array of unwind info running parallel to aChunks (same
391 * allocation as this structure, located after the bitmaps).
392 * (For Windows, the structures must reside in 32-bit RVA distance to the
393 * actual chunk, so they are allocated off the chunk.) */
394 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
395#endif
396
397 /** The allocation chunks. */
398 RT_FLEXIBLE_ARRAY_EXTENSION
399 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
400} IEMEXECMEMALLOCATOR;
401/** Pointer to an executable memory allocator. */
402typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
403
404/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
405#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
406
407
408static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
409
410
411/**
412 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
413 * the heap statistics.
414 */
415static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
416 uint32_t cbReq, uint32_t idxChunk)
417{
418 pExecMemAllocator->cAllocations += 1;
419 pExecMemAllocator->cbAllocated += cbReq;
420#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
421 pExecMemAllocator->cbFree -= cbReq;
422#else
423 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
424#endif
425 pExecMemAllocator->idxChunkHint = idxChunk;
426
427#ifdef RT_OS_DARWIN
428 /*
429 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
430 * on darwin. So, we mark the pages returned as read+write after alloc and
431 * expect the caller to call iemExecMemAllocatorReadyForUse when done
432 * writing to the allocation.
433 *
434 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
435 * for details.
436 */
437 /** @todo detect if this is necessary... it wasn't required on 10.15 or
438 * whatever older version it was. */
439 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
440 AssertRC(rc);
441#endif
442
443 return pvRet;
444}
445
446
447#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
448static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
449 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
450{
451 /*
452 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
453 */
454 Assert(!(cToScan & 63));
455 Assert(!(idxFirst & 63));
456 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
457 pbmAlloc += idxFirst / 64;
458
459 /*
460 * Scan the bitmap for cReqUnits of consequtive clear bits
461 */
462 /** @todo This can probably be done more efficiently for non-x86 systems. */
463 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
464 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
465 {
466 uint32_t idxAddBit = 1;
467 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
468 idxAddBit++;
469 if (idxAddBit >= cReqUnits)
470 {
471 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
472
473 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
474 pChunk->cFreeUnits -= cReqUnits;
475 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
476
477 void * const pvRet = (uint8_t *)pChunk->pvChunk
478 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
479
480 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
481 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
482 }
483
484 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
485 }
486 return NULL;
487}
488#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
489
490
491static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
492{
493#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
494 /*
495 * Figure out how much to allocate.
496 */
497 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
498 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
499 {
500 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
501 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
502 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
503 {
504 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
505 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
506 if (pvRet)
507 return pvRet;
508 }
509 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
510 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
511 cReqUnits, idxChunk);
512 }
513#else
514 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
515 if (pvRet)
516 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
517#endif
518 return NULL;
519
520}
521
522
523/**
524 * Allocates @a cbReq bytes of executable memory.
525 *
526 * @returns Pointer to the memory, NULL if out of memory or other problem
527 * encountered.
528 * @param pVCpu The cross context virtual CPU structure of the calling
529 * thread.
530 * @param cbReq How many bytes are required.
531 */
532static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
533{
534 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
535 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
536 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
537
538
539 for (unsigned iIteration = 0;; iIteration++)
540 {
541 /*
542 * Adjust the request size so it'll fit the allocator alignment/whatnot.
543 *
544 * For the RTHeapSimple allocator this means to follow the logic described
545 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
546 * existing chunks if we think we've got sufficient free memory around.
547 *
548 * While for the alternative one we just align it up to a whole unit size.
549 */
550#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
551 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
552#else
553 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
554#endif
555 if (cbReq <= pExecMemAllocator->cbFree)
556 {
557 uint32_t const cChunks = pExecMemAllocator->cChunks;
558 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
559 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
560 {
561 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
562 if (pvRet)
563 return pvRet;
564 }
565 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
566 {
567 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
568 if (pvRet)
569 return pvRet;
570 }
571 }
572
573 /*
574 * Can we grow it with another chunk?
575 */
576 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
577 {
578 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
579 AssertLogRelRCReturn(rc, NULL);
580
581 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
582 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
583 if (pvRet)
584 return pvRet;
585 AssertFailed();
586 }
587
588 /*
589 * Try prune native TBs once.
590 */
591 if (iIteration == 0)
592 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
593 else
594 {
595 /** @todo stats... */
596 return NULL;
597 }
598 }
599
600}
601
602
603/** This is a hook that we may need later for changing memory protection back
604 * to readonly+exec */
605static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
606{
607#ifdef RT_OS_DARWIN
608 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
609 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
610 AssertRC(rc); RT_NOREF(pVCpu);
611
612 /*
613 * Flush the instruction cache:
614 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
615 */
616 /* sys_dcache_flush(pv, cb); - not necessary */
617 sys_icache_invalidate(pv, cb);
618#else
619 RT_NOREF(pVCpu, pv, cb);
620#endif
621}
622
623
624/**
625 * Frees executable memory.
626 */
627void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
628{
629 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
630 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
631 Assert(pv);
632#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
633 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
634#else
635 Assert(!((uintptr_t)pv & 63));
636#endif
637
638 /* Align the size as we did when allocating the block. */
639#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
640 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
641#else
642 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
643#endif
644
645 /* Free it / assert sanity. */
646#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
647 uint32_t const cChunks = pExecMemAllocator->cChunks;
648 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
649 bool fFound = false;
650 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
651 {
652 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
653 fFound = offChunk < cbChunk;
654 if (fFound)
655 {
656#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
657 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
658 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
659
660 /* Check that it's valid and free it. */
661 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
662 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
663 for (uint32_t i = 1; i < cReqUnits; i++)
664 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
665 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
666
667 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
668 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
669
670 /* Update the stats. */
671 pExecMemAllocator->cbAllocated -= cb;
672 pExecMemAllocator->cbFree += cb;
673 pExecMemAllocator->cAllocations -= 1;
674 return;
675#else
676 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
677 break;
678#endif
679 }
680 }
681# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
682 AssertFailed();
683# else
684 Assert(fFound);
685# endif
686#endif
687
688#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
689 /* Update stats while cb is freshly calculated.*/
690 pExecMemAllocator->cbAllocated -= cb;
691 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
692 pExecMemAllocator->cAllocations -= 1;
693
694 /* Free it. */
695 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
696#endif
697}
698
699
700
701#ifdef IN_RING3
702# ifdef RT_OS_WINDOWS
703
704/**
705 * Initializes the unwind info structures for windows hosts.
706 */
707static int
708iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
709 void *pvChunk, uint32_t idxChunk)
710{
711 RT_NOREF(pVCpu);
712
713 /*
714 * The AMD64 unwind opcodes.
715 *
716 * This is a program that starts with RSP after a RET instruction that
717 * ends up in recompiled code, and the operations we describe here will
718 * restore all non-volatile registers and bring RSP back to where our
719 * RET address is. This means it's reverse order from what happens in
720 * the prologue.
721 *
722 * Note! Using a frame register approach here both because we have one
723 * and but mainly because the UWOP_ALLOC_LARGE argument values
724 * would be a pain to write initializers for. On the positive
725 * side, we're impervious to changes in the the stack variable
726 * area can can deal with dynamic stack allocations if necessary.
727 */
728 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
729 {
730 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
731 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
732 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
733 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
734 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
735 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
736 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
737 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
738 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
739 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
740 };
741 union
742 {
743 IMAGE_UNWIND_INFO Info;
744 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
745 } s_UnwindInfo =
746 {
747 {
748 /* .Version = */ 1,
749 /* .Flags = */ 0,
750 /* .SizeOfProlog = */ 16, /* whatever */
751 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
752 /* .FrameRegister = */ X86_GREG_xBP,
753 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
754 }
755 };
756 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
757 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
758
759 /*
760 * Calc how much space we need and allocate it off the exec heap.
761 */
762 unsigned const cFunctionEntries = 1;
763 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
764 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
765# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
766 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
767 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
768 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
769# else
770 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
771 - pExecMemAllocator->cbHeapBlockHdr;
772 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
773 32 /*cbAlignment*/);
774# endif
775 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
776 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
777
778 /*
779 * Initialize the structures.
780 */
781 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
782
783 paFunctions[0].BeginAddress = 0;
784 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
785 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
786
787 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
788 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
789
790 /*
791 * Register it.
792 */
793 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
794 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
795
796 return VINF_SUCCESS;
797}
798
799
800# else /* !RT_OS_WINDOWS */
801
802/**
803 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
804 */
805DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
806{
807 if (iValue >= 64)
808 {
809 Assert(iValue < 0x2000);
810 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
811 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
812 }
813 else if (iValue >= 0)
814 *Ptr.pb++ = (uint8_t)iValue;
815 else if (iValue > -64)
816 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
817 else
818 {
819 Assert(iValue > -0x2000);
820 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
821 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
822 }
823 return Ptr;
824}
825
826
827/**
828 * Emits an ULEB128 encoded value (up to 64-bit wide).
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
831{
832 while (uValue >= 0x80)
833 {
834 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
835 uValue >>= 7;
836 }
837 *Ptr.pb++ = (uint8_t)uValue;
838 return Ptr;
839}
840
841
842/**
843 * Emits a CFA rule as register @a uReg + offset @a off.
844 */
845DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
846{
847 *Ptr.pb++ = DW_CFA_def_cfa;
848 Ptr = iemDwarfPutUleb128(Ptr, uReg);
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854/**
855 * Emits a register (@a uReg) save location:
856 * CFA + @a off * data_alignment_factor
857 */
858DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
859{
860 if (uReg < 0x40)
861 *Ptr.pb++ = DW_CFA_offset | uReg;
862 else
863 {
864 *Ptr.pb++ = DW_CFA_offset_extended;
865 Ptr = iemDwarfPutUleb128(Ptr, uReg);
866 }
867 Ptr = iemDwarfPutUleb128(Ptr, off);
868 return Ptr;
869}
870
871
872# if 0 /* unused */
873/**
874 * Emits a register (@a uReg) save location, using signed offset:
875 * CFA + @a offSigned * data_alignment_factor
876 */
877DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
878{
879 *Ptr.pb++ = DW_CFA_offset_extended_sf;
880 Ptr = iemDwarfPutUleb128(Ptr, uReg);
881 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
882 return Ptr;
883}
884# endif
885
886
887/**
888 * Initializes the unwind info section for non-windows hosts.
889 */
890static int
891iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
892 void *pvChunk, uint32_t idxChunk)
893{
894 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
895 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
896
897 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
898
899 /*
900 * Generate the CIE first.
901 */
902# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
903 uint8_t const iDwarfVer = 3;
904# else
905 uint8_t const iDwarfVer = 4;
906# endif
907 RTPTRUNION const PtrCie = Ptr;
908 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
909 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
910 *Ptr.pb++ = iDwarfVer; /* DwARF version */
911 *Ptr.pb++ = 0; /* Augmentation. */
912 if (iDwarfVer >= 4)
913 {
914 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
915 *Ptr.pb++ = 0; /* Segment selector size. */
916 }
917# ifdef RT_ARCH_AMD64
918 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
919# else
920 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
921# endif
922 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
923# ifdef RT_ARCH_AMD64
924 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
925# elif defined(RT_ARCH_ARM64)
926 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
927# else
928# error "port me"
929# endif
930 /* Initial instructions: */
931# ifdef RT_ARCH_AMD64
932 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
940# elif defined(RT_ARCH_ARM64)
941# if 1
942 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
943# else
944 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
945# endif
946 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
947 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
948 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
949 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
950 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
951 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
952 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
953 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
954 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
955 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
956 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
957 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
958 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
959 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
960# else
961# error "port me"
962# endif
963 while ((Ptr.u - PtrCie.u) & 3)
964 *Ptr.pb++ = DW_CFA_nop;
965 /* Finalize the CIE size. */
966 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
967
968 /*
969 * Generate an FDE for the whole chunk area.
970 */
971# ifdef IEMNATIVE_USE_LIBUNWIND
972 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
973# endif
974 RTPTRUNION const PtrFde = Ptr;
975 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
976 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
977 Ptr.pu32++;
978 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
979 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
980# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
981 *Ptr.pb++ = DW_CFA_nop;
982# endif
983 while ((Ptr.u - PtrFde.u) & 3)
984 *Ptr.pb++ = DW_CFA_nop;
985 /* Finalize the FDE size. */
986 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
987
988 /* Terminator entry. */
989 *Ptr.pu32++ = 0;
990 *Ptr.pu32++ = 0; /* just to be sure... */
991 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
992
993 /*
994 * Register it.
995 */
996# ifdef IEMNATIVE_USE_LIBUNWIND
997 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
998# else
999 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1000 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1001# endif
1002
1003# ifdef IEMNATIVE_USE_GDB_JIT
1004 /*
1005 * Now for telling GDB about this (experimental).
1006 *
1007 * This seems to work best with ET_DYN.
1008 */
1009 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
1010# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1011 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
1012 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
1013# else
1014 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
1015 - pExecMemAllocator->cbHeapBlockHdr;
1016 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
1017# endif
1018 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1019 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1020
1021 RT_ZERO(*pSymFile);
1022
1023 /*
1024 * The ELF header:
1025 */
1026 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1027 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1028 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1029 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1030 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1031 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1032 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1033 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1034# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1035 pSymFile->EHdr.e_type = ET_DYN;
1036# else
1037 pSymFile->EHdr.e_type = ET_REL;
1038# endif
1039# ifdef RT_ARCH_AMD64
1040 pSymFile->EHdr.e_machine = EM_AMD64;
1041# elif defined(RT_ARCH_ARM64)
1042 pSymFile->EHdr.e_machine = EM_AARCH64;
1043# else
1044# error "port me"
1045# endif
1046 pSymFile->EHdr.e_version = 1; /*?*/
1047 pSymFile->EHdr.e_entry = 0;
1048# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1049 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1050# else
1051 pSymFile->EHdr.e_phoff = 0;
1052# endif
1053 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1054 pSymFile->EHdr.e_flags = 0;
1055 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1056# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1057 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1058 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1059# else
1060 pSymFile->EHdr.e_phentsize = 0;
1061 pSymFile->EHdr.e_phnum = 0;
1062# endif
1063 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1064 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1065 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1066
1067 uint32_t offStrTab = 0;
1068#define APPEND_STR(a_szStr) do { \
1069 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1070 offStrTab += sizeof(a_szStr); \
1071 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1072 } while (0)
1073#define APPEND_STR_FMT(a_szStr, ...) do { \
1074 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1075 offStrTab++; \
1076 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1077 } while (0)
1078
1079 /*
1080 * Section headers.
1081 */
1082 /* Section header #0: NULL */
1083 unsigned i = 0;
1084 APPEND_STR("");
1085 RT_ZERO(pSymFile->aShdrs[i]);
1086 i++;
1087
1088 /* Section header: .eh_frame */
1089 pSymFile->aShdrs[i].sh_name = offStrTab;
1090 APPEND_STR(".eh_frame");
1091 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1092 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1093# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1094 pSymFile->aShdrs[i].sh_offset
1095 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1096# else
1097 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1098 pSymFile->aShdrs[i].sh_offset = 0;
1099# endif
1100
1101 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1107 i++;
1108
1109 /* Section header: .shstrtab */
1110 unsigned const iShStrTab = i;
1111 pSymFile->EHdr.e_shstrndx = iShStrTab;
1112 pSymFile->aShdrs[i].sh_name = offStrTab;
1113 APPEND_STR(".shstrtab");
1114 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1115 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1119# else
1120 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1121 pSymFile->aShdrs[i].sh_offset = 0;
1122# endif
1123 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1124 pSymFile->aShdrs[i].sh_link = 0;
1125 pSymFile->aShdrs[i].sh_info = 0;
1126 pSymFile->aShdrs[i].sh_addralign = 1;
1127 pSymFile->aShdrs[i].sh_entsize = 0;
1128 i++;
1129
1130 /* Section header: .symbols */
1131 pSymFile->aShdrs[i].sh_name = offStrTab;
1132 APPEND_STR(".symtab");
1133 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1134 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1135 pSymFile->aShdrs[i].sh_offset
1136 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1137 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1138 pSymFile->aShdrs[i].sh_link = iShStrTab;
1139 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1140 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1141 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1142 i++;
1143
1144# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1145 /* Section header: .symbols */
1146 pSymFile->aShdrs[i].sh_name = offStrTab;
1147 APPEND_STR(".dynsym");
1148 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1149 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1150 pSymFile->aShdrs[i].sh_offset
1151 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1152 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1153 pSymFile->aShdrs[i].sh_link = iShStrTab;
1154 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1155 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1156 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1157 i++;
1158# endif
1159
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1161 /* Section header: .dynamic */
1162 pSymFile->aShdrs[i].sh_name = offStrTab;
1163 APPEND_STR(".dynamic");
1164 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1165 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1166 pSymFile->aShdrs[i].sh_offset
1167 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1168 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1169 pSymFile->aShdrs[i].sh_link = iShStrTab;
1170 pSymFile->aShdrs[i].sh_info = 0;
1171 pSymFile->aShdrs[i].sh_addralign = 1;
1172 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1173 i++;
1174# endif
1175
1176 /* Section header: .text */
1177 unsigned const iShText = i;
1178 pSymFile->aShdrs[i].sh_name = offStrTab;
1179 APPEND_STR(".text");
1180 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1181 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1182# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1183 pSymFile->aShdrs[i].sh_offset
1184 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1185# else
1186 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1187 pSymFile->aShdrs[i].sh_offset = 0;
1188# endif
1189 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1190 pSymFile->aShdrs[i].sh_link = 0;
1191 pSymFile->aShdrs[i].sh_info = 0;
1192 pSymFile->aShdrs[i].sh_addralign = 1;
1193 pSymFile->aShdrs[i].sh_entsize = 0;
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1197
1198# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1199 /*
1200 * The program headers:
1201 */
1202 /* Everything in a single LOAD segment: */
1203 i = 0;
1204 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1205 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1206 pSymFile->aPhdrs[i].p_offset
1207 = pSymFile->aPhdrs[i].p_vaddr
1208 = pSymFile->aPhdrs[i].p_paddr = 0;
1209 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1210 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1211 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1212 i++;
1213 /* The .dynamic segment. */
1214 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1215 pSymFile->aPhdrs[i].p_flags = PF_R;
1216 pSymFile->aPhdrs[i].p_offset
1217 = pSymFile->aPhdrs[i].p_vaddr
1218 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1219 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1220 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1221 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1222 i++;
1223
1224 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1225
1226 /*
1227 * The dynamic section:
1228 */
1229 i = 0;
1230 pSymFile->aDyn[i].d_tag = DT_SONAME;
1231 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1232 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1233 i++;
1234 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1235 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1236 i++;
1237 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1238 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1239 i++;
1240 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1241 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1242 i++;
1243 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1244 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1245 i++;
1246 pSymFile->aDyn[i].d_tag = DT_NULL;
1247 i++;
1248 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1249# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1250
1251 /*
1252 * Symbol tables:
1253 */
1254 /** @todo gdb doesn't seem to really like this ... */
1255 i = 0;
1256 pSymFile->aSymbols[i].st_name = 0;
1257 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1258 pSymFile->aSymbols[i].st_value = 0;
1259 pSymFile->aSymbols[i].st_size = 0;
1260 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1261 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1262# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1263 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1264# endif
1265 i++;
1266
1267 pSymFile->aSymbols[i].st_name = 0;
1268 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1269 pSymFile->aSymbols[i].st_value = 0;
1270 pSymFile->aSymbols[i].st_size = 0;
1271 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1272 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1273 i++;
1274
1275 pSymFile->aSymbols[i].st_name = offStrTab;
1276 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1277# if 0
1278 pSymFile->aSymbols[i].st_shndx = iShText;
1279 pSymFile->aSymbols[i].st_value = 0;
1280# else
1281 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1282 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1283# endif
1284 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1285 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1286 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1287# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1288 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1289 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1290# endif
1291 i++;
1292
1293 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1294 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1295
1296 /*
1297 * The GDB JIT entry and informing GDB.
1298 */
1299 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1300# if 1
1301 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1302# else
1303 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1304# endif
1305
1306 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1307 RTCritSectEnter(&g_IemNativeGdbJitLock);
1308 pEhFrame->GdbJitEntry.pNext = NULL;
1309 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1310 if (__jit_debug_descriptor.pTail)
1311 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1312 else
1313 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1314 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1315 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1316
1317 /* Notify GDB: */
1318 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1319 __jit_debug_register_code();
1320 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1321 RTCritSectLeave(&g_IemNativeGdbJitLock);
1322
1323# else /* !IEMNATIVE_USE_GDB_JIT */
1324 RT_NOREF(pVCpu);
1325# endif /* !IEMNATIVE_USE_GDB_JIT */
1326
1327 return VINF_SUCCESS;
1328}
1329
1330# endif /* !RT_OS_WINDOWS */
1331#endif /* IN_RING3 */
1332
1333
1334/**
1335 * Adds another chunk to the executable memory allocator.
1336 *
1337 * This is used by the init code for the initial allocation and later by the
1338 * regular allocator function when it's out of memory.
1339 */
1340static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1341{
1342 /* Check that we've room for growth. */
1343 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1344 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1345
1346 /* Allocate a chunk. */
1347#ifdef RT_OS_DARWIN
1348 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1349#else
1350 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1351#endif
1352 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1353
1354#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1355 int rc = VINF_SUCCESS;
1356#else
1357 /* Initialize the heap for the chunk. */
1358 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1359 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1360 AssertRC(rc);
1361 if (RT_SUCCESS(rc))
1362 {
1363 /*
1364 * We want the memory to be aligned on 64 byte, so the first time thru
1365 * here we do some exploratory allocations to see how we can achieve this.
1366 * On subsequent runs we only make an initial adjustment allocation, if
1367 * necessary.
1368 *
1369 * Since we own the heap implementation, we know that the internal block
1370 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1371 * so all we need to wrt allocation size adjustments is to add 32 bytes
1372 * to the size, align up by 64 bytes, and subtract 32 bytes.
1373 *
1374 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1375 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1376 * allocation to force subsequent allocations to return 64 byte aligned
1377 * user areas.
1378 */
1379 if (!pExecMemAllocator->cbHeapBlockHdr)
1380 {
1381 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1382 pExecMemAllocator->cbHeapAlignTweak = 64;
1383 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1384 32 /*cbAlignment*/);
1385 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1386
1387 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1388 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1389 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1390 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1391 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1392
1393 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1394 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1395 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1396 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1397 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1398
1399 RTHeapSimpleFree(hHeap, pvTest2);
1400 RTHeapSimpleFree(hHeap, pvTest1);
1401 }
1402 else
1403 {
1404 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1405 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1406 }
1407 if (RT_SUCCESS(rc))
1408#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1409 {
1410 /*
1411 * Add the chunk.
1412 *
1413 * This must be done before the unwind init so windows can allocate
1414 * memory from the chunk when using the alternative sub-allocator.
1415 */
1416 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1417#ifdef IN_RING3
1418 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1419#endif
1420#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1421 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1422#else
1423 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1424 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1425 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1426 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1427#endif
1428
1429 pExecMemAllocator->cChunks = idxChunk + 1;
1430 pExecMemAllocator->idxChunkHint = idxChunk;
1431
1432#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1433 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1435#else
1436 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1437 pExecMemAllocator->cbTotal += cbFree;
1438 pExecMemAllocator->cbFree += cbFree;
1439#endif
1440
1441#ifdef IN_RING3
1442 /*
1443 * Initialize the unwind information (this cannot really fail atm).
1444 * (This sets pvUnwindInfo.)
1445 */
1446 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1447 if (RT_SUCCESS(rc))
1448#endif
1449 {
1450 return VINF_SUCCESS;
1451 }
1452
1453#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1454 /* Just in case the impossible happens, undo the above up: */
1455 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1456 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1457 pExecMemAllocator->cChunks = idxChunk;
1458 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1459 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1460 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1461 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1462#endif
1463 }
1464#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1465 }
1466#endif
1467 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1468 RT_NOREF(pVCpu);
1469 return rc;
1470}
1471
1472
1473/**
1474 * Initializes the executable memory allocator for native recompilation on the
1475 * calling EMT.
1476 *
1477 * @returns VBox status code.
1478 * @param pVCpu The cross context virtual CPU structure of the calling
1479 * thread.
1480 * @param cbMax The max size of the allocator.
1481 * @param cbInitial The initial allocator size.
1482 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1483 * dependent).
1484 */
1485int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1486{
1487 /*
1488 * Validate input.
1489 */
1490 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1491 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1492 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1493 || cbChunk == 0
1494 || ( RT_IS_POWER_OF_TWO(cbChunk)
1495 && cbChunk >= _1M
1496 && cbChunk <= _256M
1497 && cbChunk <= cbMax),
1498 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1499 VERR_OUT_OF_RANGE);
1500
1501 /*
1502 * Adjust/figure out the chunk size.
1503 */
1504 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1505 {
1506 if (cbMax >= _256M)
1507 cbChunk = _64M;
1508 else
1509 {
1510 if (cbMax < _16M)
1511 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1512 else
1513 cbChunk = (uint32_t)cbMax / 4;
1514 if (!RT_IS_POWER_OF_TWO(cbChunk))
1515 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1516 }
1517 }
1518
1519 if (cbChunk > cbMax)
1520 cbMax = cbChunk;
1521 else
1522 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1523 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1524 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1525
1526 /*
1527 * Allocate and initialize the allocatore instance.
1528 */
1529 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1530#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1531 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1532 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1533 cbNeeded += cbBitmap * cMaxChunks;
1534 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1535 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1539 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1540#endif
1541 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1542 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1543 VERR_NO_MEMORY);
1544 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1545 pExecMemAllocator->cbChunk = cbChunk;
1546 pExecMemAllocator->cMaxChunks = cMaxChunks;
1547 pExecMemAllocator->cChunks = 0;
1548 pExecMemAllocator->idxChunkHint = 0;
1549 pExecMemAllocator->cAllocations = 0;
1550 pExecMemAllocator->cbTotal = 0;
1551 pExecMemAllocator->cbFree = 0;
1552 pExecMemAllocator->cbAllocated = 0;
1553#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1554 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1555 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1556 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1557 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1558#endif
1559#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1560 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1561#endif
1562 for (uint32_t i = 0; i < cMaxChunks; i++)
1563 {
1564#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1565 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1566 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1567#else
1568 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1569#endif
1570 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1571#ifdef IN_RING0
1572 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1573#else
1574 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1575#endif
1576 }
1577 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1578
1579 /*
1580 * Do the initial allocations.
1581 */
1582 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1583 {
1584 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1585 AssertLogRelRCReturn(rc, rc);
1586 }
1587
1588 pExecMemAllocator->idxChunkHint = 0;
1589
1590 return VINF_SUCCESS;
1591}
1592
1593
1594/*********************************************************************************************************************************
1595* Native Recompilation *
1596*********************************************************************************************************************************/
1597
1598
1599/**
1600 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1601 */
1602IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1603{
1604 pVCpu->iem.s.cInstructions += idxInstr;
1605 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1606}
1607
1608
1609/**
1610 * Used by TB code when it wants to raise a \#GP(0).
1611 */
1612IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1613{
1614 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1615#ifndef _MSC_VER
1616 return VINF_IEM_RAISED_XCPT; /* not reached */
1617#endif
1618}
1619
1620
1621/**
1622 * Used by TB code when detecting opcode changes.
1623 * @see iemThreadeFuncWorkerObsoleteTb
1624 */
1625IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1626{
1627 /* We set fSafeToFree to false where as we're being called in the context
1628 of a TB callback function, which for native TBs means we cannot release
1629 the executable memory till we've returned our way back to iemTbExec as
1630 that return path codes via the native code generated for the TB. */
1631 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1632 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1633 return VINF_IEM_REEXEC_BREAK;
1634}
1635
1636
1637/**
1638 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1639 */
1640IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1641{
1642 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1643 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1644 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1645 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1646 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1647 return VINF_IEM_REEXEC_BREAK;
1648}
1649
1650
1651/**
1652 * Used by TB code when we missed a PC check after a branch.
1653 */
1654IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1655{
1656 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1657 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1658 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1659 pVCpu->iem.s.pbInstrBuf));
1660 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1661 return VINF_IEM_REEXEC_BREAK;
1662}
1663
1664
1665
1666/*********************************************************************************************************************************
1667* Helpers: Segmented memory fetches and stores. *
1668*********************************************************************************************************************************/
1669
1670/**
1671 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1672 */
1673IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1674{
1675#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1676 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1677#else
1678 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1679#endif
1680}
1681
1682
1683/**
1684 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1685 * to 16 bits.
1686 */
1687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1688{
1689#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1690 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1691#else
1692 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1693#endif
1694}
1695
1696
1697/**
1698 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1699 * to 32 bits.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1704 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1705#else
1706 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1707#endif
1708}
1709
1710/**
1711 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1712 * to 64 bits.
1713 */
1714IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1715{
1716#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1717 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1718#else
1719 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1720#endif
1721}
1722
1723
1724/**
1725 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1726 */
1727IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1728{
1729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1730 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1731#else
1732 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1733#endif
1734}
1735
1736
1737/**
1738 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1739 * to 32 bits.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1753 * to 64 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1767 */
1768IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1769{
1770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1771 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1772#else
1773 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1774#endif
1775}
1776
1777
1778/**
1779 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1807 */
1808IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1809{
1810#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1811 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1812#else
1813 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1814#endif
1815}
1816
1817
1818/**
1819 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1820 */
1821IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1822{
1823#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1824 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1825#else
1826 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1827#endif
1828}
1829
1830
1831/**
1832 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1833 */
1834IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1835{
1836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1837 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1838#else
1839 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1840#endif
1841}
1842
1843
1844/**
1845 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1846 */
1847IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1848{
1849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1850 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1851#else
1852 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1853#endif
1854}
1855
1856
1857
1858/**
1859 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1860 */
1861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1862{
1863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1864 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1865#else
1866 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1867#endif
1868}
1869
1870
1871/**
1872 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1873 */
1874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1875{
1876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1877 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1878#else
1879 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1880#endif
1881}
1882
1883
1884/**
1885 * Used by TB code to store an 32-bit selector value onto a generic stack.
1886 *
1887 * Intel CPUs doesn't do write a whole dword, thus the special function.
1888 */
1889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1890{
1891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1892 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1893#else
1894 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1895#endif
1896}
1897
1898
1899/**
1900 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1901 */
1902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1903{
1904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1905 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1906#else
1907 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1908#endif
1909}
1910
1911
1912/**
1913 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1914 */
1915IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1916{
1917#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1918 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1919#else
1920 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1921#endif
1922}
1923
1924
1925/**
1926 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1927 */
1928IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1929{
1930#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1931 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1932#else
1933 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1934#endif
1935}
1936
1937
1938/**
1939 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1940 */
1941IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1942{
1943#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1944 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1945#else
1946 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1947#endif
1948}
1949
1950
1951
1952/*********************************************************************************************************************************
1953* Helpers: Flat memory fetches and stores. *
1954*********************************************************************************************************************************/
1955
1956/**
1957 * Used by TB code to load unsigned 8-bit data w/ flat address.
1958 * @note Zero extending the value to 64-bit to simplify assembly.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1963 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1964#else
1965 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1972 * to 16 bits.
1973 * @note Zero extending the value to 64-bit to simplify assembly.
1974 */
1975IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1976{
1977#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1978 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1979#else
1980 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1981#endif
1982}
1983
1984
1985/**
1986 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1987 * to 32 bits.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 64 bits.
2003 */
2004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2005{
2006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2007 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2008#else
2009 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2010#endif
2011}
2012
2013
2014/**
2015 * Used by TB code to load unsigned 16-bit data w/ flat address.
2016 * @note Zero extending the value to 64-bit to simplify assembly.
2017 */
2018IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2019{
2020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2021 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2022#else
2023 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2024#endif
2025}
2026
2027
2028/**
2029 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2030 * to 32 bits.
2031 * @note Zero extending the value to 64-bit to simplify assembly.
2032 */
2033IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2034{
2035#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2036 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2037#else
2038 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2039#endif
2040}
2041
2042
2043/**
2044 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2045 * to 64 bits.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load unsigned 32-bit data w/ flat address.
2060 * @note Zero extending the value to 64-bit to simplify assembly.
2061 */
2062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2063{
2064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2065 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2066#else
2067 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2068#endif
2069}
2070
2071
2072/**
2073 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2074 * to 64 bits.
2075 * @note Zero extending the value to 64-bit to simplify assembly.
2076 */
2077IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2078{
2079#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2080 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2081#else
2082 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2083#endif
2084}
2085
2086
2087/**
2088 * Used by TB code to load unsigned 64-bit data w/ flat address.
2089 */
2090IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2091{
2092#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2093 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2094#else
2095 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2096#endif
2097}
2098
2099
2100/**
2101 * Used by TB code to store unsigned 8-bit data w/ flat address.
2102 */
2103IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2104{
2105#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2106 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2107#else
2108 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2109#endif
2110}
2111
2112
2113/**
2114 * Used by TB code to store unsigned 16-bit data w/ flat address.
2115 */
2116IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2117{
2118#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2119 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2120#else
2121 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2122#endif
2123}
2124
2125
2126/**
2127 * Used by TB code to store unsigned 32-bit data w/ flat address.
2128 */
2129IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2130{
2131#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2132 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2133#else
2134 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2135#endif
2136}
2137
2138
2139/**
2140 * Used by TB code to store unsigned 64-bit data w/ flat address.
2141 */
2142IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2143{
2144#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2145 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2146#else
2147 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2148#endif
2149}
2150
2151
2152
2153/**
2154 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2155 */
2156IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2157{
2158#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2159 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2160#else
2161 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2162#endif
2163}
2164
2165
2166/**
2167 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2168 */
2169IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2170{
2171#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2172 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2173#else
2174 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2175#endif
2176}
2177
2178
2179/**
2180 * Used by TB code to store a segment selector value onto a flat stack.
2181 *
2182 * Intel CPUs doesn't do write a whole dword, thus the special function.
2183 */
2184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2185{
2186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2187 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2188#else
2189 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2190#endif
2191}
2192
2193
2194/**
2195 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2196 */
2197IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2198{
2199#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2200 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2201#else
2202 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2203#endif
2204}
2205
2206
2207/**
2208 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2213 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2214#else
2215 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2216#endif
2217}
2218
2219
2220/**
2221 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2222 */
2223IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2224{
2225#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2226 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2227#else
2228 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2229#endif
2230}
2231
2232
2233/**
2234 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2235 */
2236IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2237{
2238#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2239 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2240#else
2241 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2242#endif
2243}
2244
2245
2246
2247/*********************************************************************************************************************************
2248* Helpers: Segmented memory mapping. *
2249*********************************************************************************************************************************/
2250
2251/**
2252 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2255 RTGCPTR GCPtrMem, uint8_t iSegReg))
2256{
2257#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2258 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2259#else
2260 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2261#endif
2262}
2263
2264
2265/**
2266 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2267 */
2268IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2269 RTGCPTR GCPtrMem, uint8_t iSegReg))
2270{
2271#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2272 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2273#else
2274 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2275#endif
2276}
2277
2278
2279/**
2280 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2281 */
2282IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2283 RTGCPTR GCPtrMem, uint8_t iSegReg))
2284{
2285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2286 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2287#else
2288 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2289#endif
2290}
2291
2292
2293/**
2294 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2295 */
2296IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2297 RTGCPTR GCPtrMem, uint8_t iSegReg))
2298{
2299#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2300 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2301#else
2302 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2303#endif
2304}
2305
2306
2307/**
2308 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2309 */
2310IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2311 RTGCPTR GCPtrMem, uint8_t iSegReg))
2312{
2313#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2314 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2315#else
2316 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2317#endif
2318}
2319
2320
2321/**
2322 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2323 */
2324IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2325 RTGCPTR GCPtrMem, uint8_t iSegReg))
2326{
2327#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2328 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2329#else
2330 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2331#endif
2332}
2333
2334
2335/**
2336 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2337 */
2338IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2339 RTGCPTR GCPtrMem, uint8_t iSegReg))
2340{
2341#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2342 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2343#else
2344 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2345#endif
2346}
2347
2348
2349/**
2350 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2351 */
2352IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2353 RTGCPTR GCPtrMem, uint8_t iSegReg))
2354{
2355#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2356 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2357#else
2358 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2359#endif
2360}
2361
2362
2363/**
2364 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2365 */
2366IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2367 RTGCPTR GCPtrMem, uint8_t iSegReg))
2368{
2369#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2370 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2371#else
2372 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2373#endif
2374}
2375
2376
2377/**
2378 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2379 */
2380IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2381 RTGCPTR GCPtrMem, uint8_t iSegReg))
2382{
2383#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2384 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2385#else
2386 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2387#endif
2388}
2389
2390
2391/**
2392 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2393 */
2394IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2395 RTGCPTR GCPtrMem, uint8_t iSegReg))
2396{
2397#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2398 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2399#else
2400 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2401#endif
2402}
2403
2404
2405/**
2406 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2407 */
2408IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2409 RTGCPTR GCPtrMem, uint8_t iSegReg))
2410{
2411#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2412 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2413#else
2414 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2415#endif
2416}
2417
2418
2419/**
2420 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2421 */
2422IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2423 RTGCPTR GCPtrMem, uint8_t iSegReg))
2424{
2425#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2426 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2427#else
2428 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2429#endif
2430}
2431
2432
2433/**
2434 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2435 */
2436IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2437 RTGCPTR GCPtrMem, uint8_t iSegReg))
2438{
2439#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2440 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2441#else
2442 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2443#endif
2444}
2445
2446
2447/**
2448 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2449 */
2450IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2451 RTGCPTR GCPtrMem, uint8_t iSegReg))
2452{
2453#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2454 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2455#else
2456 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2457#endif
2458}
2459
2460
2461/**
2462 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2463 */
2464IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2465 RTGCPTR GCPtrMem, uint8_t iSegReg))
2466{
2467#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2468 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2469#else
2470 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2471#endif
2472}
2473
2474
2475/**
2476 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2477 */
2478IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2479 RTGCPTR GCPtrMem, uint8_t iSegReg))
2480{
2481#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2482 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2483#else
2484 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2485#endif
2486}
2487
2488
2489/*********************************************************************************************************************************
2490* Helpers: Flat memory mapping. *
2491*********************************************************************************************************************************/
2492
2493/**
2494 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2497{
2498#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2499 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2500#else
2501 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2502#endif
2503}
2504
2505
2506/**
2507 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2508 */
2509IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2510{
2511#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2512 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2513#else
2514 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2515#endif
2516}
2517
2518
2519/**
2520 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2521 */
2522IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2523{
2524#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2525 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2526#else
2527 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2528#endif
2529}
2530
2531
2532/**
2533 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2534 */
2535IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2536{
2537#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2538 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2539#else
2540 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2541#endif
2542}
2543
2544
2545/**
2546 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2547 */
2548IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2549{
2550#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2551 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2552#else
2553 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2554#endif
2555}
2556
2557
2558/**
2559 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2560 */
2561IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2562{
2563#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2564 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2565#else
2566 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2567#endif
2568}
2569
2570
2571/**
2572 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2573 */
2574IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2578#else
2579 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2580#endif
2581}
2582
2583
2584/**
2585 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2586 */
2587IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2588{
2589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2590 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2591#else
2592 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2593#endif
2594}
2595
2596
2597/**
2598 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2599 */
2600IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2601{
2602#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2603 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2604#else
2605 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2606#endif
2607}
2608
2609
2610/**
2611 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2612 */
2613IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2614{
2615#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2616 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2617#else
2618 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2619#endif
2620}
2621
2622
2623/**
2624 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2625 */
2626IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2627{
2628#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2629 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2630#else
2631 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2632#endif
2633}
2634
2635
2636/**
2637 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2640{
2641#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2642 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2643#else
2644 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2645#endif
2646}
2647
2648
2649/**
2650 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2651 */
2652IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2653{
2654#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2655 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2656#else
2657 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2658#endif
2659}
2660
2661
2662/**
2663 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2664 */
2665IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2666{
2667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2668 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2669#else
2670 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2671#endif
2672}
2673
2674
2675/**
2676 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2677 */
2678IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2679{
2680#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2681 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2682#else
2683 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2684#endif
2685}
2686
2687
2688/**
2689 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2690 */
2691IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2692{
2693#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2694 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2695#else
2696 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2697#endif
2698}
2699
2700
2701/**
2702 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2703 */
2704IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2705{
2706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2707 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2708#else
2709 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2710#endif
2711}
2712
2713
2714/*********************************************************************************************************************************
2715* Helpers: Commit, rollback & unmap *
2716*********************************************************************************************************************************/
2717
2718/**
2719 * Used by TB code to commit and unmap a read-write memory mapping.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2722{
2723 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2724}
2725
2726
2727/**
2728 * Used by TB code to commit and unmap a write-only memory mapping.
2729 */
2730IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2731{
2732 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2733}
2734
2735
2736/**
2737 * Used by TB code to commit and unmap a read-only memory mapping.
2738 */
2739IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2740{
2741 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2742}
2743
2744
2745/**
2746 * Reinitializes the native recompiler state.
2747 *
2748 * Called before starting a new recompile job.
2749 */
2750static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2751{
2752 pReNative->cLabels = 0;
2753 pReNative->bmLabelTypes = 0;
2754 pReNative->cFixups = 0;
2755#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2756 pReNative->pDbgInfo->cEntries = 0;
2757#endif
2758 pReNative->pTbOrg = pTb;
2759 pReNative->cCondDepth = 0;
2760 pReNative->uCondSeqNo = 0;
2761 pReNative->uCheckIrqSeqNo = 0;
2762 pReNative->uTlbSeqNo = 0;
2763
2764 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2765#if IEMNATIVE_HST_GREG_COUNT < 32
2766 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2767#endif
2768 ;
2769 pReNative->Core.bmHstRegsWithGstShadow = 0;
2770 pReNative->Core.bmGstRegShadows = 0;
2771 pReNative->Core.bmVars = 0;
2772 pReNative->Core.bmStack = 0;
2773 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2774 pReNative->Core.u64ArgVars = UINT64_MAX;
2775
2776 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2777 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2778 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2779 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2780 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2781 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2782 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2783 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2784 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2785 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2786
2787 /* Full host register reinit: */
2788 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2789 {
2790 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2791 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2792 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2793 }
2794
2795 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2796 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2797#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2798 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2799#endif
2800#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2801 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2802#endif
2803 );
2804 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2805 {
2806 fRegs &= ~RT_BIT_32(idxReg);
2807 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2808 }
2809
2810 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2811#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2812 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2813#endif
2814#ifdef IEMNATIVE_REG_FIXED_TMP0
2815 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2816#endif
2817 return pReNative;
2818}
2819
2820
2821/**
2822 * Allocates and initializes the native recompiler state.
2823 *
2824 * This is called the first time an EMT wants to recompile something.
2825 *
2826 * @returns Pointer to the new recompiler state.
2827 * @param pVCpu The cross context virtual CPU structure of the calling
2828 * thread.
2829 * @param pTb The TB that's about to be recompiled.
2830 * @thread EMT(pVCpu)
2831 */
2832static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2833{
2834 VMCPU_ASSERT_EMT(pVCpu);
2835
2836 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2837 AssertReturn(pReNative, NULL);
2838
2839 /*
2840 * Try allocate all the buffers and stuff we need.
2841 */
2842 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2843 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2844 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2845#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2846 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2847#endif
2848 if (RT_LIKELY( pReNative->pInstrBuf
2849 && pReNative->paLabels
2850 && pReNative->paFixups)
2851#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2852 && pReNative->pDbgInfo
2853#endif
2854 )
2855 {
2856 /*
2857 * Set the buffer & array sizes on success.
2858 */
2859 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2860 pReNative->cLabelsAlloc = _8K;
2861 pReNative->cFixupsAlloc = _16K;
2862#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2863 pReNative->cDbgInfoAlloc = _16K;
2864#endif
2865
2866 /* Other constant stuff: */
2867 pReNative->pVCpu = pVCpu;
2868
2869 /*
2870 * Done, just need to save it and reinit it.
2871 */
2872 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2873 return iemNativeReInit(pReNative, pTb);
2874 }
2875
2876 /*
2877 * Failed. Cleanup and return.
2878 */
2879 AssertFailed();
2880 RTMemFree(pReNative->pInstrBuf);
2881 RTMemFree(pReNative->paLabels);
2882 RTMemFree(pReNative->paFixups);
2883#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2884 RTMemFree(pReNative->pDbgInfo);
2885#endif
2886 RTMemFree(pReNative);
2887 return NULL;
2888}
2889
2890
2891/**
2892 * Creates a label
2893 *
2894 * If the label does not yet have a defined position,
2895 * call iemNativeLabelDefine() later to set it.
2896 *
2897 * @returns Label ID. Throws VBox status code on failure, so no need to check
2898 * the return value.
2899 * @param pReNative The native recompile state.
2900 * @param enmType The label type.
2901 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2902 * label is not yet defined (default).
2903 * @param uData Data associated with the lable. Only applicable to
2904 * certain type of labels. Default is zero.
2905 */
2906DECL_HIDDEN_THROW(uint32_t)
2907iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2908 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2909{
2910 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2911
2912 /*
2913 * Locate existing label definition.
2914 *
2915 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2916 * and uData is zero.
2917 */
2918 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2919 uint32_t const cLabels = pReNative->cLabels;
2920 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2921#ifndef VBOX_STRICT
2922 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2923 && offWhere == UINT32_MAX
2924 && uData == 0
2925#endif
2926 )
2927 {
2928#ifndef VBOX_STRICT
2929 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2930 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2931 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2932 if (idxLabel < pReNative->cLabels)
2933 return idxLabel;
2934#else
2935 for (uint32_t i = 0; i < cLabels; i++)
2936 if ( paLabels[i].enmType == enmType
2937 && paLabels[i].uData == uData)
2938 {
2939 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2940 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2941 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2942 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2943 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2944 return i;
2945 }
2946 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2947 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2948#endif
2949 }
2950
2951 /*
2952 * Make sure we've got room for another label.
2953 */
2954 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2955 { /* likely */ }
2956 else
2957 {
2958 uint32_t cNew = pReNative->cLabelsAlloc;
2959 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2960 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2961 cNew *= 2;
2962 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2963 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2964 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2965 pReNative->paLabels = paLabels;
2966 pReNative->cLabelsAlloc = cNew;
2967 }
2968
2969 /*
2970 * Define a new label.
2971 */
2972 paLabels[cLabels].off = offWhere;
2973 paLabels[cLabels].enmType = enmType;
2974 paLabels[cLabels].uData = uData;
2975 pReNative->cLabels = cLabels + 1;
2976
2977 Assert((unsigned)enmType < 64);
2978 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2979
2980 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2981 {
2982 Assert(uData == 0);
2983 pReNative->aidxUniqueLabels[enmType] = cLabels;
2984 }
2985
2986 if (offWhere != UINT32_MAX)
2987 {
2988#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2989 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2990 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2991#endif
2992 }
2993 return cLabels;
2994}
2995
2996
2997/**
2998 * Defines the location of an existing label.
2999 *
3000 * @param pReNative The native recompile state.
3001 * @param idxLabel The label to define.
3002 * @param offWhere The position.
3003 */
3004DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3005{
3006 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3007 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3008 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3009 pLabel->off = offWhere;
3010#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3011 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3012 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3013#endif
3014}
3015
3016
3017/**
3018 * Looks up a lable.
3019 *
3020 * @returns Label ID if found, UINT32_MAX if not.
3021 */
3022static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3023 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3024{
3025 Assert((unsigned)enmType < 64);
3026 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3027 {
3028 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3029 return pReNative->aidxUniqueLabels[enmType];
3030
3031 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3032 uint32_t const cLabels = pReNative->cLabels;
3033 for (uint32_t i = 0; i < cLabels; i++)
3034 if ( paLabels[i].enmType == enmType
3035 && paLabels[i].uData == uData
3036 && ( paLabels[i].off == offWhere
3037 || offWhere == UINT32_MAX
3038 || paLabels[i].off == UINT32_MAX))
3039 return i;
3040 }
3041 return UINT32_MAX;
3042}
3043
3044
3045/**
3046 * Adds a fixup.
3047 *
3048 * @throws VBox status code (int) on failure.
3049 * @param pReNative The native recompile state.
3050 * @param offWhere The instruction offset of the fixup location.
3051 * @param idxLabel The target label ID for the fixup.
3052 * @param enmType The fixup type.
3053 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3054 */
3055DECL_HIDDEN_THROW(void)
3056iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3057 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3058{
3059 Assert(idxLabel <= UINT16_MAX);
3060 Assert((unsigned)enmType <= UINT8_MAX);
3061
3062 /*
3063 * Make sure we've room.
3064 */
3065 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3066 uint32_t const cFixups = pReNative->cFixups;
3067 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3068 { /* likely */ }
3069 else
3070 {
3071 uint32_t cNew = pReNative->cFixupsAlloc;
3072 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3073 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3074 cNew *= 2;
3075 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3076 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3077 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3078 pReNative->paFixups = paFixups;
3079 pReNative->cFixupsAlloc = cNew;
3080 }
3081
3082 /*
3083 * Add the fixup.
3084 */
3085 paFixups[cFixups].off = offWhere;
3086 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3087 paFixups[cFixups].enmType = enmType;
3088 paFixups[cFixups].offAddend = offAddend;
3089 pReNative->cFixups = cFixups + 1;
3090}
3091
3092
3093/**
3094 * Slow code path for iemNativeInstrBufEnsure.
3095 */
3096DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3097{
3098 /* Double the buffer size till we meet the request. */
3099 uint32_t cNew = pReNative->cInstrBufAlloc;
3100 AssertReturn(cNew > 0, NULL);
3101 do
3102 cNew *= 2;
3103 while (cNew < off + cInstrReq);
3104
3105 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3106#ifdef RT_ARCH_ARM64
3107 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3108#else
3109 uint32_t const cbMaxInstrBuf = _2M;
3110#endif
3111 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3112
3113 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3114 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3115
3116#ifdef VBOX_STRICT
3117 pReNative->offInstrBufChecked = off + cInstrReq;
3118#endif
3119 pReNative->cInstrBufAlloc = cNew;
3120 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3121}
3122
3123#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3124
3125/**
3126 * Grows the static debug info array used during recompilation.
3127 *
3128 * @returns Pointer to the new debug info block; throws VBox status code on
3129 * failure, so no need to check the return value.
3130 */
3131DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3132{
3133 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3134 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3135 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3136 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3137 pReNative->pDbgInfo = pDbgInfo;
3138 pReNative->cDbgInfoAlloc = cNew;
3139 return pDbgInfo;
3140}
3141
3142
3143/**
3144 * Adds a new debug info uninitialized entry, returning the pointer to it.
3145 */
3146DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3147{
3148 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3149 { /* likely */ }
3150 else
3151 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3152 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3153}
3154
3155
3156/**
3157 * Debug Info: Adds a native offset record, if necessary.
3158 */
3159static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3160{
3161 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3162
3163 /*
3164 * Search backwards to see if we've got a similar record already.
3165 */
3166 uint32_t idx = pDbgInfo->cEntries;
3167 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3168 while (idx-- > idxStop)
3169 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3170 {
3171 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3172 return;
3173 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3174 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3175 break;
3176 }
3177
3178 /*
3179 * Add it.
3180 */
3181 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3182 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3183 pEntry->NativeOffset.offNative = off;
3184}
3185
3186
3187/**
3188 * Debug Info: Record info about a label.
3189 */
3190static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3191{
3192 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3193 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3194 pEntry->Label.uUnused = 0;
3195 pEntry->Label.enmLabel = (uint8_t)enmType;
3196 pEntry->Label.uData = uData;
3197}
3198
3199
3200/**
3201 * Debug Info: Record info about a threaded call.
3202 */
3203static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3204{
3205 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3206 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3207 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3208 pEntry->ThreadedCall.uUnused = 0;
3209 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3210}
3211
3212
3213/**
3214 * Debug Info: Record info about a new guest instruction.
3215 */
3216static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3217{
3218 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3219 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3220 pEntry->GuestInstruction.uUnused = 0;
3221 pEntry->GuestInstruction.fExec = fExec;
3222}
3223
3224
3225/**
3226 * Debug Info: Record info about guest register shadowing.
3227 */
3228static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3229 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3230{
3231 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3232 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3233 pEntry->GuestRegShadowing.uUnused = 0;
3234 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3235 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3236 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3237}
3238
3239#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3240
3241
3242/*********************************************************************************************************************************
3243* Register Allocator *
3244*********************************************************************************************************************************/
3245
3246/**
3247 * Register parameter indexes (indexed by argument number).
3248 */
3249DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3250{
3251 IEMNATIVE_CALL_ARG0_GREG,
3252 IEMNATIVE_CALL_ARG1_GREG,
3253 IEMNATIVE_CALL_ARG2_GREG,
3254 IEMNATIVE_CALL_ARG3_GREG,
3255#if defined(IEMNATIVE_CALL_ARG4_GREG)
3256 IEMNATIVE_CALL_ARG4_GREG,
3257# if defined(IEMNATIVE_CALL_ARG5_GREG)
3258 IEMNATIVE_CALL_ARG5_GREG,
3259# if defined(IEMNATIVE_CALL_ARG6_GREG)
3260 IEMNATIVE_CALL_ARG6_GREG,
3261# if defined(IEMNATIVE_CALL_ARG7_GREG)
3262 IEMNATIVE_CALL_ARG7_GREG,
3263# endif
3264# endif
3265# endif
3266#endif
3267};
3268
3269/**
3270 * Call register masks indexed by argument count.
3271 */
3272DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3273{
3274 0,
3275 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3276 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3277 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3278 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3279 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3280#if defined(IEMNATIVE_CALL_ARG4_GREG)
3281 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3282 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3283# if defined(IEMNATIVE_CALL_ARG5_GREG)
3284 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3285 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3286# if defined(IEMNATIVE_CALL_ARG6_GREG)
3287 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3288 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3289 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3290# if defined(IEMNATIVE_CALL_ARG7_GREG)
3291 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3292 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3293 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3294# endif
3295# endif
3296# endif
3297#endif
3298};
3299
3300#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3301/**
3302 * BP offset of the stack argument slots.
3303 *
3304 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3305 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3306 */
3307DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3308{
3309 IEMNATIVE_FP_OFF_STACK_ARG0,
3310# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3311 IEMNATIVE_FP_OFF_STACK_ARG1,
3312# endif
3313# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3314 IEMNATIVE_FP_OFF_STACK_ARG2,
3315# endif
3316# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3317 IEMNATIVE_FP_OFF_STACK_ARG3,
3318# endif
3319};
3320AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3321#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3322
3323/**
3324 * Info about shadowed guest register values.
3325 * @see IEMNATIVEGSTREG
3326 */
3327static struct
3328{
3329 /** Offset in VMCPU. */
3330 uint32_t off;
3331 /** The field size. */
3332 uint8_t cb;
3333 /** Name (for logging). */
3334 const char *pszName;
3335} const g_aGstShadowInfo[] =
3336{
3337#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3338 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3339 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3340 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3341 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3342 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3343 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3344 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3345 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3346 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3347 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3348 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3349 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3350 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3351 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3352 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3353 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3354 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3355 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3356 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3357 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3358 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3359 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3360 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3361 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3362 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3363 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3364 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3365 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3366 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3367 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3368 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3369 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3370 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3371 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3372 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3373 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3374 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3375 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3376 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3377 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3378 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3379 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3380#undef CPUMCTX_OFF_AND_SIZE
3381};
3382AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3383
3384
3385/** Host CPU general purpose register names. */
3386DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3387{
3388#ifdef RT_ARCH_AMD64
3389 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3390#elif RT_ARCH_ARM64
3391 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3392 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3393#else
3394# error "port me"
3395#endif
3396};
3397
3398
3399DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3400 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3401{
3402 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3403
3404 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3405 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3406 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3407 return (uint8_t)idxReg;
3408}
3409
3410
3411/**
3412 * Tries to locate a suitable register in the given register mask.
3413 *
3414 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3415 * failed.
3416 *
3417 * @returns Host register number on success, returns UINT8_MAX on failure.
3418 */
3419static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3420{
3421 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3422 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3423 if (fRegs)
3424 {
3425 /** @todo pick better here: */
3426 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3427
3428 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3429 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3430 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3431 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3432
3433 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3434 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3435 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3436 return idxReg;
3437 }
3438 return UINT8_MAX;
3439}
3440
3441
3442/**
3443 * Locate a register, possibly freeing one up.
3444 *
3445 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3446 * failed.
3447 *
3448 * @returns Host register number on success. Returns UINT8_MAX if no registers
3449 * found, the caller is supposed to deal with this and raise a
3450 * allocation type specific status code (if desired).
3451 *
3452 * @throws VBox status code if we're run into trouble spilling a variable of
3453 * recording debug info. Does NOT throw anything if we're out of
3454 * registers, though.
3455 */
3456static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3457 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3458{
3459 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3460 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3461
3462 /*
3463 * Try a freed register that's shadowing a guest register
3464 */
3465 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3466 if (fRegs)
3467 {
3468 unsigned const idxReg = (fPreferVolatile
3469 ? ASMBitFirstSetU32(fRegs)
3470 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3471 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3472 - 1;
3473
3474 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3475 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3476 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3477 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3478
3479 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3480 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3481 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3482 return idxReg;
3483 }
3484
3485 /*
3486 * Try free up a variable that's in a register.
3487 *
3488 * We do two rounds here, first evacuating variables we don't need to be
3489 * saved on the stack, then in the second round move things to the stack.
3490 */
3491 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3492 {
3493 uint32_t fVars = pReNative->Core.bmVars;
3494 while (fVars)
3495 {
3496 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3497 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3498 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3499 && (RT_BIT_32(idxReg) & fRegMask)
3500 && ( iLoop == 0
3501 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3502 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3503 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3504 {
3505 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3506 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3507 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3508 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3509 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3510 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3511
3512 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3513 {
3514 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3515 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3516 }
3517
3518 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3519 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3520
3521 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3522 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3523 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3524 return idxReg;
3525 }
3526 fVars &= ~RT_BIT_32(idxVar);
3527 }
3528 }
3529
3530 return UINT8_MAX;
3531}
3532
3533
3534/**
3535 * Reassigns a variable to a different register specified by the caller.
3536 *
3537 * @returns The new code buffer position.
3538 * @param pReNative The native recompile state.
3539 * @param off The current code buffer position.
3540 * @param idxVar The variable index.
3541 * @param idxRegOld The old host register number.
3542 * @param idxRegNew The new host register number.
3543 * @param pszCaller The caller for logging.
3544 */
3545static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3546 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3547{
3548 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3549 RT_NOREF(pszCaller);
3550
3551 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3552
3553 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3554 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3555 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3556 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3557
3558 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3559 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3560 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3561 if (fGstRegShadows)
3562 {
3563 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3564 | RT_BIT_32(idxRegNew);
3565 while (fGstRegShadows)
3566 {
3567 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3568 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3569
3570 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3571 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3572 }
3573 }
3574
3575 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3576 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3577 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3578 return off;
3579}
3580
3581
3582/**
3583 * Moves a variable to a different register or spills it onto the stack.
3584 *
3585 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3586 * kinds can easily be recreated if needed later.
3587 *
3588 * @returns The new code buffer position.
3589 * @param pReNative The native recompile state.
3590 * @param off The current code buffer position.
3591 * @param idxVar The variable index.
3592 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3593 * call-volatile registers.
3594 */
3595static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3596 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3597{
3598 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3599 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3600 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3601
3602 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3603 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3604 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3605 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3606 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3607 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3608 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3609 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3610 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3611
3612
3613 /** @todo Add statistics on this.*/
3614 /** @todo Implement basic variable liveness analysis (python) so variables
3615 * can be freed immediately once no longer used. This has the potential to
3616 * be trashing registers and stack for dead variables. */
3617
3618 /*
3619 * First try move it to a different register, as that's cheaper.
3620 */
3621 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3622 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3623 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3624 if (fRegs)
3625 {
3626 /* Avoid using shadow registers, if possible. */
3627 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3628 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3629 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3630 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3631 }
3632
3633 /*
3634 * Otherwise we must spill the register onto the stack.
3635 */
3636 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3637 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3638 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3639 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3640
3641 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3642 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3643 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3644 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3645 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3646 return off;
3647}
3648
3649
3650/**
3651 * Allocates a temporary host general purpose register.
3652 *
3653 * This may emit code to save register content onto the stack in order to free
3654 * up a register.
3655 *
3656 * @returns The host register number; throws VBox status code on failure,
3657 * so no need to check the return value.
3658 * @param pReNative The native recompile state.
3659 * @param poff Pointer to the variable with the code buffer position.
3660 * This will be update if we need to move a variable from
3661 * register to stack in order to satisfy the request.
3662 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3663 * registers (@c true, default) or the other way around
3664 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3665 */
3666DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3667{
3668 /*
3669 * Try find a completely unused register, preferably a call-volatile one.
3670 */
3671 uint8_t idxReg;
3672 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3673 & ~pReNative->Core.bmHstRegsWithGstShadow
3674 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3675 if (fRegs)
3676 {
3677 if (fPreferVolatile)
3678 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3679 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3680 else
3681 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3682 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3683 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3684 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3685 }
3686 else
3687 {
3688 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3689 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3690 }
3691 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3692}
3693
3694
3695/**
3696 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3697 * registers.
3698 *
3699 * @returns The host register number; throws VBox status code on failure,
3700 * so no need to check the return value.
3701 * @param pReNative The native recompile state.
3702 * @param poff Pointer to the variable with the code buffer position.
3703 * This will be update if we need to move a variable from
3704 * register to stack in order to satisfy the request.
3705 * @param fRegMask Mask of acceptable registers.
3706 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3707 * registers (@c true, default) or the other way around
3708 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3709 */
3710DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3711 bool fPreferVolatile /*= true*/)
3712{
3713 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3714 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3715
3716 /*
3717 * Try find a completely unused register, preferably a call-volatile one.
3718 */
3719 uint8_t idxReg;
3720 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3721 & ~pReNative->Core.bmHstRegsWithGstShadow
3722 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3723 & fRegMask;
3724 if (fRegs)
3725 {
3726 if (fPreferVolatile)
3727 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3728 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3729 else
3730 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3731 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3732 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3733 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3734 }
3735 else
3736 {
3737 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3738 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3739 }
3740 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3741}
3742
3743
3744/**
3745 * Allocates a temporary register for loading an immediate value into.
3746 *
3747 * This will emit code to load the immediate, unless there happens to be an
3748 * unused register with the value already loaded.
3749 *
3750 * The caller will not modify the returned register, it must be considered
3751 * read-only. Free using iemNativeRegFreeTmpImm.
3752 *
3753 * @returns The host register number; throws VBox status code on failure, so no
3754 * need to check the return value.
3755 * @param pReNative The native recompile state.
3756 * @param poff Pointer to the variable with the code buffer position.
3757 * @param uImm The immediate value that the register must hold upon
3758 * return.
3759 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3760 * registers (@c true, default) or the other way around
3761 * (@c false).
3762 *
3763 * @note Reusing immediate values has not been implemented yet.
3764 */
3765DECL_HIDDEN_THROW(uint8_t)
3766iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3767{
3768 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3769 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3770 return idxReg;
3771}
3772
3773
3774/**
3775 * Marks host register @a idxHstReg as containing a shadow copy of guest
3776 * register @a enmGstReg.
3777 *
3778 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3779 * host register before calling.
3780 */
3781DECL_FORCE_INLINE(void)
3782iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3783{
3784 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3785 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3786 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3787
3788 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3789 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3790 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3791 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3792#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3793 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3794 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3795#else
3796 RT_NOREF(off);
3797#endif
3798}
3799
3800
3801/**
3802 * Clear any guest register shadow claims from @a idxHstReg.
3803 *
3804 * The register does not need to be shadowing any guest registers.
3805 */
3806DECL_FORCE_INLINE(void)
3807iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3808{
3809 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3810 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3811 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3812 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3813 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3814
3815#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3816 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3817 if (fGstRegs)
3818 {
3819 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3820 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3821 while (fGstRegs)
3822 {
3823 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3824 fGstRegs &= ~RT_BIT_64(iGstReg);
3825 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3826 }
3827 }
3828#else
3829 RT_NOREF(off);
3830#endif
3831
3832 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3833 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3834 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3835}
3836
3837
3838/**
3839 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3840 * and global overview flags.
3841 */
3842DECL_FORCE_INLINE(void)
3843iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3844{
3845 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3846 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3847 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3848 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3849 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3850 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3851 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3852
3853#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3854 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3855 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3856#else
3857 RT_NOREF(off);
3858#endif
3859
3860 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3861 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3862 if (!fGstRegShadowsNew)
3863 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3864 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3865}
3866
3867
3868/**
3869 * Clear any guest register shadow claim for @a enmGstReg.
3870 */
3871DECL_FORCE_INLINE(void)
3872iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3873{
3874 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3875 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3876 {
3877 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3878 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3879 }
3880}
3881
3882
3883/**
3884 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3885 * as the new shadow of it.
3886 */
3887DECL_FORCE_INLINE(void)
3888iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3889 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3890{
3891 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3892 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3893 {
3894 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3895 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3896 return;
3897 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3898 }
3899 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3900}
3901
3902
3903/**
3904 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3905 * to @a idxRegTo.
3906 */
3907DECL_FORCE_INLINE(void)
3908iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3909 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3910{
3911 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3912 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3913 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3914 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3915 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3916 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3917 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3918 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3919 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3920
3921 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3922 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3923 if (!fGstRegShadowsFrom)
3924 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3925 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3926 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3927 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3928#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3929 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3930 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3931#else
3932 RT_NOREF(off);
3933#endif
3934}
3935
3936
3937/**
3938 * Allocates a temporary host general purpose register for keeping a guest
3939 * register value.
3940 *
3941 * Since we may already have a register holding the guest register value,
3942 * code will be emitted to do the loading if that's not the case. Code may also
3943 * be emitted if we have to free up a register to satify the request.
3944 *
3945 * @returns The host register number; throws VBox status code on failure, so no
3946 * need to check the return value.
3947 * @param pReNative The native recompile state.
3948 * @param poff Pointer to the variable with the code buffer
3949 * position. This will be update if we need to move a
3950 * variable from register to stack in order to satisfy
3951 * the request.
3952 * @param enmGstReg The guest register that will is to be updated.
3953 * @param enmIntendedUse How the caller will be using the host register.
3954 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3955 * register is okay (default). The ASSUMPTION here is
3956 * that the caller has already flushed all volatile
3957 * registers, so this is only applied if we allocate a
3958 * new register.
3959 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3960 */
3961DECL_HIDDEN_THROW(uint8_t)
3962iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3963 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3964 bool fNoVolatileRegs /*= false*/)
3965{
3966 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3967#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3968 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3969#endif
3970 uint32_t const fRegMask = !fNoVolatileRegs
3971 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3972 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3973
3974 /*
3975 * First check if the guest register value is already in a host register.
3976 */
3977 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3978 {
3979 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3980 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3981 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3982 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3983
3984 /* It's not supposed to be allocated... */
3985 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3986 {
3987 /*
3988 * If the register will trash the guest shadow copy, try find a
3989 * completely unused register we can use instead. If that fails,
3990 * we need to disassociate the host reg from the guest reg.
3991 */
3992 /** @todo would be nice to know if preserving the register is in any way helpful. */
3993 /* If the purpose is calculations, try duplicate the register value as
3994 we'll be clobbering the shadow. */
3995 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3996 && ( ~pReNative->Core.bmHstRegs
3997 & ~pReNative->Core.bmHstRegsWithGstShadow
3998 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3999 {
4000 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4001
4002 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4003
4004 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4005 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4006 g_apszIemNativeHstRegNames[idxRegNew]));
4007 idxReg = idxRegNew;
4008 }
4009 /* If the current register matches the restrictions, go ahead and allocate
4010 it for the caller. */
4011 else if (fRegMask & RT_BIT_32(idxReg))
4012 {
4013 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4014 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4015 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4016 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4017 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4018 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4019 else
4020 {
4021 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4022 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4023 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4024 }
4025 }
4026 /* Otherwise, allocate a register that satisfies the caller and transfer
4027 the shadowing if compatible with the intended use. (This basically
4028 means the call wants a non-volatile register (RSP push/pop scenario).) */
4029 else
4030 {
4031 Assert(fNoVolatileRegs);
4032 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4033 !fNoVolatileRegs
4034 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4035 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4036 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4037 {
4038 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4039 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4040 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4041 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4042 }
4043 else
4044 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4045 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4046 g_apszIemNativeHstRegNames[idxRegNew]));
4047 idxReg = idxRegNew;
4048 }
4049 }
4050 else
4051 {
4052 /*
4053 * Oops. Shadowed guest register already allocated!
4054 *
4055 * Allocate a new register, copy the value and, if updating, the
4056 * guest shadow copy assignment to the new register.
4057 */
4058 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4059 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4060 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4061 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4062
4063 /** @todo share register for readonly access. */
4064 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4065 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4066
4067 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4068 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4069
4070 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4071 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4072 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4073 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4074 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4075 else
4076 {
4077 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4078 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4079 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4080 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4081 }
4082 idxReg = idxRegNew;
4083 }
4084 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4085
4086#ifdef VBOX_STRICT
4087 /* Strict builds: Check that the value is correct. */
4088 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4089#endif
4090
4091 return idxReg;
4092 }
4093
4094 /*
4095 * Allocate a new register, load it with the guest value and designate it as a copy of the
4096 */
4097 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4098
4099 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4100 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4101
4102 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4103 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4104 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4105 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4106
4107 return idxRegNew;
4108}
4109
4110
4111/**
4112 * Allocates a temporary host general purpose register that already holds the
4113 * given guest register value.
4114 *
4115 * The use case for this function is places where the shadowing state cannot be
4116 * modified due to branching and such. This will fail if the we don't have a
4117 * current shadow copy handy or if it's incompatible. The only code that will
4118 * be emitted here is value checking code in strict builds.
4119 *
4120 * The intended use can only be readonly!
4121 *
4122 * @returns The host register number, UINT8_MAX if not present.
4123 * @param pReNative The native recompile state.
4124 * @param poff Pointer to the instruction buffer offset.
4125 * Will be updated in strict builds if a register is
4126 * found.
4127 * @param enmGstReg The guest register that will is to be updated.
4128 * @note In strict builds, this may throw instruction buffer growth failures.
4129 * Non-strict builds will not throw anything.
4130 * @sa iemNativeRegAllocTmpForGuestReg
4131 */
4132DECL_HIDDEN_THROW(uint8_t)
4133iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4134{
4135 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4136
4137 /*
4138 * First check if the guest register value is already in a host register.
4139 */
4140 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4141 {
4142 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4143 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4144 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4145 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4146
4147 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4148 {
4149 /*
4150 * We only do readonly use here, so easy compared to the other
4151 * variant of this code.
4152 */
4153 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4154 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4155 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4156 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4157 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4158
4159#ifdef VBOX_STRICT
4160 /* Strict builds: Check that the value is correct. */
4161 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4162#else
4163 RT_NOREF(poff);
4164#endif
4165 return idxReg;
4166 }
4167 }
4168
4169 return UINT8_MAX;
4170}
4171
4172
4173DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
4174
4175
4176/**
4177 * Allocates argument registers for a function call.
4178 *
4179 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4180 * need to check the return value.
4181 * @param pReNative The native recompile state.
4182 * @param off The current code buffer offset.
4183 * @param cArgs The number of arguments the function call takes.
4184 */
4185DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4186{
4187 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4188 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4189 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4190 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4191
4192 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4193 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4194 else if (cArgs == 0)
4195 return true;
4196
4197 /*
4198 * Do we get luck and all register are free and not shadowing anything?
4199 */
4200 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4201 for (uint32_t i = 0; i < cArgs; i++)
4202 {
4203 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4204 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4205 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4206 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4207 }
4208 /*
4209 * Okay, not lucky so we have to free up the registers.
4210 */
4211 else
4212 for (uint32_t i = 0; i < cArgs; i++)
4213 {
4214 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4215 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4216 {
4217 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4218 {
4219 case kIemNativeWhat_Var:
4220 {
4221 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4222 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
4223 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4224 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4225 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4226
4227 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4228 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4229 else
4230 {
4231 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4232 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4233 }
4234 break;
4235 }
4236
4237 case kIemNativeWhat_Tmp:
4238 case kIemNativeWhat_Arg:
4239 case kIemNativeWhat_rc:
4240 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4241 default:
4242 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4243 }
4244
4245 }
4246 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4247 {
4248 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4249 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4250 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4251 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4252 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4253 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4254 }
4255 else
4256 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4257 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4258 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4259 }
4260 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4261 return true;
4262}
4263
4264
4265DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4266
4267
4268#if 0
4269/**
4270 * Frees a register assignment of any type.
4271 *
4272 * @param pReNative The native recompile state.
4273 * @param idxHstReg The register to free.
4274 *
4275 * @note Does not update variables.
4276 */
4277DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4278{
4279 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4280 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4281 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4282 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4283 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4284 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4285 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4286 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4287 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4288 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4289 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4290 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4291 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4292 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4293
4294 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4295 /* no flushing, right:
4296 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4297 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4298 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4299 */
4300}
4301#endif
4302
4303
4304/**
4305 * Frees a temporary register.
4306 *
4307 * Any shadow copies of guest registers assigned to the host register will not
4308 * be flushed by this operation.
4309 */
4310DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4311{
4312 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4313 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4314 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4315 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4316 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4317}
4318
4319
4320/**
4321 * Frees a temporary immediate register.
4322 *
4323 * It is assumed that the call has not modified the register, so it still hold
4324 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4325 */
4326DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4327{
4328 iemNativeRegFreeTmp(pReNative, idxHstReg);
4329}
4330
4331
4332/**
4333 * Frees a register assigned to a variable.
4334 *
4335 * The register will be disassociated from the variable.
4336 */
4337DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4338{
4339 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4340 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4341 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4342 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4343 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4344
4345 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4346 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4347 if (!fFlushShadows)
4348 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4349 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4350 else
4351 {
4352 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4353 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4354 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4355 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4356 uint64_t fGstRegShadows = fGstRegShadowsOld;
4357 while (fGstRegShadows)
4358 {
4359 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4360 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4361
4362 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4363 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4364 }
4365 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4366 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4367 }
4368}
4369
4370
4371/**
4372 * Called right before emitting a call instruction to move anything important
4373 * out of call-volatile registers, free and flush the call-volatile registers,
4374 * optionally freeing argument variables.
4375 *
4376 * @returns New code buffer offset, UINT32_MAX on failure.
4377 * @param pReNative The native recompile state.
4378 * @param off The code buffer offset.
4379 * @param cArgs The number of arguments the function call takes.
4380 * It is presumed that the host register part of these have
4381 * been allocated as such already and won't need moving,
4382 * just freeing.
4383 * @param fKeepVars Mask of variables that should keep their register
4384 * assignments. Caller must take care to handle these.
4385 */
4386DECL_HIDDEN_THROW(uint32_t)
4387iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4388{
4389 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4390
4391 /* fKeepVars will reduce this mask. */
4392 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4393
4394 /*
4395 * Move anything important out of volatile registers.
4396 */
4397 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4398 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4399 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4400#ifdef IEMNATIVE_REG_FIXED_TMP0
4401 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4402#endif
4403 & ~g_afIemNativeCallRegs[cArgs];
4404
4405 fRegsToMove &= pReNative->Core.bmHstRegs;
4406 if (!fRegsToMove)
4407 { /* likely */ }
4408 else
4409 {
4410 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4411 while (fRegsToMove != 0)
4412 {
4413 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4414 fRegsToMove &= ~RT_BIT_32(idxReg);
4415
4416 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4417 {
4418 case kIemNativeWhat_Var:
4419 {
4420 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4421 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4422 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4423 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4424 if (!(RT_BIT_32(idxVar) & fKeepVars))
4425 {
4426 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4427 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4428 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4429 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4430 else
4431 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4432 }
4433 else
4434 fRegsToFree &= ~RT_BIT_32(idxReg);
4435 continue;
4436 }
4437
4438 case kIemNativeWhat_Arg:
4439 AssertMsgFailed(("What?!?: %u\n", idxReg));
4440 continue;
4441
4442 case kIemNativeWhat_rc:
4443 case kIemNativeWhat_Tmp:
4444 AssertMsgFailed(("Missing free: %u\n", idxReg));
4445 continue;
4446
4447 case kIemNativeWhat_FixedTmp:
4448 case kIemNativeWhat_pVCpuFixed:
4449 case kIemNativeWhat_pCtxFixed:
4450 case kIemNativeWhat_FixedReserved:
4451 case kIemNativeWhat_Invalid:
4452 case kIemNativeWhat_End:
4453 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4454 }
4455 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4456 }
4457 }
4458
4459 /*
4460 * Do the actual freeing.
4461 */
4462 if (pReNative->Core.bmHstRegs & fRegsToFree)
4463 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4464 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4465 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4466
4467 /* If there are guest register shadows in any call-volatile register, we
4468 have to clear the corrsponding guest register masks for each register. */
4469 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4470 if (fHstRegsWithGstShadow)
4471 {
4472 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4473 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4474 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4475 do
4476 {
4477 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4478 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4479
4480 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4481 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4482 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4483 } while (fHstRegsWithGstShadow != 0);
4484 }
4485
4486 return off;
4487}
4488
4489
4490/**
4491 * Flushes a set of guest register shadow copies.
4492 *
4493 * This is usually done after calling a threaded function or a C-implementation
4494 * of an instruction.
4495 *
4496 * @param pReNative The native recompile state.
4497 * @param fGstRegs Set of guest registers to flush.
4498 */
4499DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4500{
4501 /*
4502 * Reduce the mask by what's currently shadowed
4503 */
4504 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4505 fGstRegs &= bmGstRegShadowsOld;
4506 if (fGstRegs)
4507 {
4508 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4509 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4510 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4511 if (bmGstRegShadowsNew)
4512 {
4513 /*
4514 * Partial.
4515 */
4516 do
4517 {
4518 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4519 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4520 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4521 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4522 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4523
4524 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4525 fGstRegs &= ~fInThisHstReg;
4526 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4527 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4528 if (!fGstRegShadowsNew)
4529 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4530 } while (fGstRegs != 0);
4531 }
4532 else
4533 {
4534 /*
4535 * Clear all.
4536 */
4537 do
4538 {
4539 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4540 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4541 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4542 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4543 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4544
4545 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4546 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4547 } while (fGstRegs != 0);
4548 pReNative->Core.bmHstRegsWithGstShadow = 0;
4549 }
4550 }
4551}
4552
4553
4554/**
4555 * Flushes guest register shadow copies held by a set of host registers.
4556 *
4557 * This is used with the TLB lookup code for ensuring that we don't carry on
4558 * with any guest shadows in volatile registers, as these will get corrupted by
4559 * a TLB miss.
4560 *
4561 * @param pReNative The native recompile state.
4562 * @param fHstRegs Set of host registers to flush guest shadows for.
4563 */
4564DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4565{
4566 /*
4567 * Reduce the mask by what's currently shadowed.
4568 */
4569 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4570 fHstRegs &= bmHstRegsWithGstShadowOld;
4571 if (fHstRegs)
4572 {
4573 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4574 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4575 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4576 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4577 if (bmHstRegsWithGstShadowNew)
4578 {
4579 /*
4580 * Partial (likely).
4581 */
4582 uint64_t fGstShadows = 0;
4583 do
4584 {
4585 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4586 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4587 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4588 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4589
4590 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4591 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4592 fHstRegs &= ~RT_BIT_32(idxHstReg);
4593 } while (fHstRegs != 0);
4594 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4595 }
4596 else
4597 {
4598 /*
4599 * Clear all.
4600 */
4601 do
4602 {
4603 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4604 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4605 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4606 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4607
4608 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4609 fHstRegs &= ~RT_BIT_32(idxHstReg);
4610 } while (fHstRegs != 0);
4611 pReNative->Core.bmGstRegShadows = 0;
4612 }
4613 }
4614}
4615
4616
4617/**
4618 * Restores guest shadow copies in volatile registers.
4619 *
4620 * This is used after calling a helper function (think TLB miss) to restore the
4621 * register state of volatile registers.
4622 *
4623 * @param pReNative The native recompile state.
4624 * @param off The code buffer offset.
4625 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4626 * be active (allocated) w/o asserting. Hack.
4627 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4628 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4629 */
4630DECL_HIDDEN_THROW(uint32_t)
4631iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4632{
4633 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4634 if (fHstRegs)
4635 {
4636 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4637 do
4638 {
4639 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4640
4641 /* It's not fatal if a register is active holding a variable that
4642 shadowing a guest register, ASSUMING all pending guest register
4643 writes were flushed prior to the helper call. However, we'll be
4644 emitting duplicate restores, so it wasts code space. */
4645 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4646 RT_NOREF(fHstRegsActiveShadows);
4647
4648 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4649 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4650 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4651 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4652
4653 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4654 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4655
4656 fHstRegs &= ~RT_BIT_32(idxHstReg);
4657 } while (fHstRegs != 0);
4658 }
4659 return off;
4660}
4661
4662
4663/**
4664 * Flushes delayed write of a specific guest register.
4665 *
4666 * This must be called prior to calling CImpl functions and any helpers that use
4667 * the guest state (like raising exceptions) and such.
4668 *
4669 * This optimization has not yet been implemented. The first target would be
4670 * RIP updates, since these are the most common ones.
4671 */
4672DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4673 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4674{
4675 RT_NOREF(pReNative, enmClass, idxReg);
4676 return off;
4677}
4678
4679
4680/**
4681 * Flushes any delayed guest register writes.
4682 *
4683 * This must be called prior to calling CImpl functions and any helpers that use
4684 * the guest state (like raising exceptions) and such.
4685 *
4686 * This optimization has not yet been implemented. The first target would be
4687 * RIP updates, since these are the most common ones.
4688 */
4689DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4690{
4691 RT_NOREF(pReNative, off);
4692 return off;
4693}
4694
4695
4696#ifdef VBOX_STRICT
4697/**
4698 * Does internal register allocator sanity checks.
4699 */
4700static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4701{
4702 /*
4703 * Iterate host registers building a guest shadowing set.
4704 */
4705 uint64_t bmGstRegShadows = 0;
4706 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4707 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4708 while (bmHstRegsWithGstShadow)
4709 {
4710 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4711 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4712 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4713
4714 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4715 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4716 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4717 bmGstRegShadows |= fThisGstRegShadows;
4718 while (fThisGstRegShadows)
4719 {
4720 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4721 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4722 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4723 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4724 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4725 }
4726 }
4727 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4728 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4729 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4730
4731 /*
4732 * Now the other way around, checking the guest to host index array.
4733 */
4734 bmHstRegsWithGstShadow = 0;
4735 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4736 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4737 while (bmGstRegShadows)
4738 {
4739 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4740 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4741 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4742
4743 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4744 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4745 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4746 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4747 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4748 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4749 }
4750 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4751 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4752 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4753}
4754#endif
4755
4756
4757/*********************************************************************************************************************************
4758* Code Emitters (larger snippets) *
4759*********************************************************************************************************************************/
4760
4761/**
4762 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4763 * extending to 64-bit width.
4764 *
4765 * @returns New code buffer offset on success, UINT32_MAX on failure.
4766 * @param pReNative .
4767 * @param off The current code buffer position.
4768 * @param idxHstReg The host register to load the guest register value into.
4769 * @param enmGstReg The guest register to load.
4770 *
4771 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4772 * that is something the caller needs to do if applicable.
4773 */
4774DECL_HIDDEN_THROW(uint32_t)
4775iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4776{
4777 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4778 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4779
4780 switch (g_aGstShadowInfo[enmGstReg].cb)
4781 {
4782 case sizeof(uint64_t):
4783 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4784 case sizeof(uint32_t):
4785 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4786 case sizeof(uint16_t):
4787 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4788#if 0 /* not present in the table. */
4789 case sizeof(uint8_t):
4790 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4791#endif
4792 default:
4793 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4794 }
4795}
4796
4797
4798#ifdef VBOX_STRICT
4799/**
4800 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4801 *
4802 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4803 * Trashes EFLAGS on AMD64.
4804 */
4805static uint32_t
4806iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4807{
4808# ifdef RT_ARCH_AMD64
4809 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4810
4811 /* rol reg64, 32 */
4812 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4813 pbCodeBuf[off++] = 0xc1;
4814 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4815 pbCodeBuf[off++] = 32;
4816
4817 /* test reg32, ffffffffh */
4818 if (idxReg >= 8)
4819 pbCodeBuf[off++] = X86_OP_REX_B;
4820 pbCodeBuf[off++] = 0xf7;
4821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4822 pbCodeBuf[off++] = 0xff;
4823 pbCodeBuf[off++] = 0xff;
4824 pbCodeBuf[off++] = 0xff;
4825 pbCodeBuf[off++] = 0xff;
4826
4827 /* je/jz +1 */
4828 pbCodeBuf[off++] = 0x74;
4829 pbCodeBuf[off++] = 0x01;
4830
4831 /* int3 */
4832 pbCodeBuf[off++] = 0xcc;
4833
4834 /* rol reg64, 32 */
4835 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4836 pbCodeBuf[off++] = 0xc1;
4837 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4838 pbCodeBuf[off++] = 32;
4839
4840# elif defined(RT_ARCH_ARM64)
4841 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4842 /* lsr tmp0, reg64, #32 */
4843 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4844 /* cbz tmp0, +1 */
4845 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4846 /* brk #0x1100 */
4847 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4848
4849# else
4850# error "Port me!"
4851# endif
4852 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4853 return off;
4854}
4855#endif /* VBOX_STRICT */
4856
4857
4858#ifdef VBOX_STRICT
4859/**
4860 * Emitting code that checks that the content of register @a idxReg is the same
4861 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4862 * instruction if that's not the case.
4863 *
4864 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4865 * Trashes EFLAGS on AMD64.
4866 */
4867static uint32_t
4868iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4869{
4870# ifdef RT_ARCH_AMD64
4871 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4872
4873 /* cmp reg, [mem] */
4874 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4875 {
4876 if (idxReg >= 8)
4877 pbCodeBuf[off++] = X86_OP_REX_R;
4878 pbCodeBuf[off++] = 0x38;
4879 }
4880 else
4881 {
4882 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4883 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4884 else
4885 {
4886 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4887 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4888 else
4889 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4890 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4891 if (idxReg >= 8)
4892 pbCodeBuf[off++] = X86_OP_REX_R;
4893 }
4894 pbCodeBuf[off++] = 0x39;
4895 }
4896 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4897
4898 /* je/jz +1 */
4899 pbCodeBuf[off++] = 0x74;
4900 pbCodeBuf[off++] = 0x01;
4901
4902 /* int3 */
4903 pbCodeBuf[off++] = 0xcc;
4904
4905 /* For values smaller than the register size, we must check that the rest
4906 of the register is all zeros. */
4907 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4908 {
4909 /* test reg64, imm32 */
4910 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4911 pbCodeBuf[off++] = 0xf7;
4912 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4913 pbCodeBuf[off++] = 0;
4914 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4915 pbCodeBuf[off++] = 0xff;
4916 pbCodeBuf[off++] = 0xff;
4917
4918 /* je/jz +1 */
4919 pbCodeBuf[off++] = 0x74;
4920 pbCodeBuf[off++] = 0x01;
4921
4922 /* int3 */
4923 pbCodeBuf[off++] = 0xcc;
4924 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4925 }
4926 else
4927 {
4928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4929 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4930 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4931 }
4932
4933# elif defined(RT_ARCH_ARM64)
4934 /* mov TMP0, [gstreg] */
4935 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4936
4937 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4938 /* sub tmp0, tmp0, idxReg */
4939 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4940 /* cbz tmp0, +1 */
4941 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4942 /* brk #0x1000+enmGstReg */
4943 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4944 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4945
4946# else
4947# error "Port me!"
4948# endif
4949 return off;
4950}
4951#endif /* VBOX_STRICT */
4952
4953
4954#ifdef VBOX_STRICT
4955/**
4956 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4957 * important bits.
4958 *
4959 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4960 * Trashes EFLAGS on AMD64.
4961 */
4962static uint32_t
4963iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4964{
4965 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4966 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4967 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4968 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4969
4970#ifdef RT_ARCH_AMD64
4971 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4972
4973 /* je/jz +1 */
4974 pbCodeBuf[off++] = 0x74;
4975 pbCodeBuf[off++] = 0x01;
4976
4977 /* int3 */
4978 pbCodeBuf[off++] = 0xcc;
4979
4980# elif defined(RT_ARCH_ARM64)
4981 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4982
4983 /* b.eq +1 */
4984 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4985 /* brk #0x2000 */
4986 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4987
4988# else
4989# error "Port me!"
4990# endif
4991 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4992
4993 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4994 return off;
4995}
4996#endif /* VBOX_STRICT */
4997
4998
4999/**
5000 * Emits a code for checking the return code of a call and rcPassUp, returning
5001 * from the code if either are non-zero.
5002 */
5003DECL_HIDDEN_THROW(uint32_t)
5004iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5005{
5006#ifdef RT_ARCH_AMD64
5007 /*
5008 * AMD64: eax = call status code.
5009 */
5010
5011 /* edx = rcPassUp */
5012 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5013# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5014 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5015# endif
5016
5017 /* edx = eax | rcPassUp */
5018 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5019 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5020 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5021 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5022
5023 /* Jump to non-zero status return path. */
5024 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5025
5026 /* done. */
5027
5028#elif RT_ARCH_ARM64
5029 /*
5030 * ARM64: w0 = call status code.
5031 */
5032# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5033 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5034# endif
5035 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5036
5037 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5038
5039 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5040
5041 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5042 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5043 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5044
5045#else
5046# error "port me"
5047#endif
5048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5049 return off;
5050}
5051
5052
5053/**
5054 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5055 * raising a \#GP(0) if it isn't.
5056 *
5057 * @returns New code buffer offset, UINT32_MAX on failure.
5058 * @param pReNative The native recompile state.
5059 * @param off The code buffer offset.
5060 * @param idxAddrReg The host register with the address to check.
5061 * @param idxInstr The current instruction.
5062 */
5063DECL_HIDDEN_THROW(uint32_t)
5064iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5065{
5066 /*
5067 * Make sure we don't have any outstanding guest register writes as we may
5068 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5069 */
5070 off = iemNativeRegFlushPendingWrites(pReNative, off);
5071
5072#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5073 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5074#else
5075 RT_NOREF(idxInstr);
5076#endif
5077
5078#ifdef RT_ARCH_AMD64
5079 /*
5080 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5081 * return raisexcpt();
5082 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5083 */
5084 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5085
5086 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5087 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5088 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5089 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5090 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5091
5092 iemNativeRegFreeTmp(pReNative, iTmpReg);
5093
5094#elif defined(RT_ARCH_ARM64)
5095 /*
5096 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5097 * return raisexcpt();
5098 * ----
5099 * mov x1, 0x800000000000
5100 * add x1, x0, x1
5101 * cmp xzr, x1, lsr 48
5102 * b.ne .Lraisexcpt
5103 */
5104 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5105
5106 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5107 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5108 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5109 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5110
5111 iemNativeRegFreeTmp(pReNative, iTmpReg);
5112
5113#else
5114# error "Port me"
5115#endif
5116 return off;
5117}
5118
5119
5120/**
5121 * Emits code to check if the content of @a idxAddrReg is within the limit of
5122 * idxSegReg, raising a \#GP(0) if it isn't.
5123 *
5124 * @returns New code buffer offset; throws VBox status code on error.
5125 * @param pReNative The native recompile state.
5126 * @param off The code buffer offset.
5127 * @param idxAddrReg The host register (32-bit) with the address to
5128 * check.
5129 * @param idxSegReg The segment register (X86_SREG_XXX) to check
5130 * against.
5131 * @param idxInstr The current instruction.
5132 */
5133DECL_HIDDEN_THROW(uint32_t)
5134iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5135 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
5136{
5137 /*
5138 * Make sure we don't have any outstanding guest register writes as we may
5139 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5140 */
5141 off = iemNativeRegFlushPendingWrites(pReNative, off);
5142
5143#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5144 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5145#else
5146 RT_NOREF(idxInstr);
5147#endif
5148
5149 /** @todo implement expand down/whatnot checking */
5150 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
5151
5152 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5153 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
5154 kIemNativeGstRegUse_ForUpdate);
5155
5156 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
5157 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5158
5159 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
5160 return off;
5161}
5162
5163
5164/**
5165 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5166 *
5167 * @returns The flush mask.
5168 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5169 * @param fGstShwFlush The starting flush mask.
5170 */
5171DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5172{
5173 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5174 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5175 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5176 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5177 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5178 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5179 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5180 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5181 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5182 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5183 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5184 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5185 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5186 return fGstShwFlush;
5187}
5188
5189
5190/**
5191 * Emits a call to a CImpl function or something similar.
5192 */
5193DECL_HIDDEN_THROW(uint32_t)
5194iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5195 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5196{
5197 /*
5198 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5199 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5200 */
5201 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5202 fGstShwFlush
5203 | RT_BIT_64(kIemNativeGstReg_Pc)
5204 | RT_BIT_64(kIemNativeGstReg_EFlags));
5205 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5206
5207 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5208
5209 /*
5210 * Load the parameters.
5211 */
5212#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5213 /* Special code the hidden VBOXSTRICTRC pointer. */
5214 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5215 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5216 if (cAddParams > 0)
5217 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5218 if (cAddParams > 1)
5219 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5220 if (cAddParams > 2)
5221 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5222 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5223
5224#else
5225 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5226 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5227 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5228 if (cAddParams > 0)
5229 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5230 if (cAddParams > 1)
5231 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5232 if (cAddParams > 2)
5233# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5234 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5235# else
5236 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5237# endif
5238#endif
5239
5240 /*
5241 * Make the call.
5242 */
5243 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5244
5245#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5246 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5247#endif
5248
5249 /*
5250 * Check the status code.
5251 */
5252 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5253}
5254
5255
5256/**
5257 * Emits a call to a threaded worker function.
5258 */
5259DECL_HIDDEN_THROW(uint32_t)
5260iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5261{
5262 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5263 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5264
5265#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5266 /* The threaded function may throw / long jmp, so set current instruction
5267 number if we're counting. */
5268 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5269#endif
5270
5271 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5272
5273#ifdef RT_ARCH_AMD64
5274 /* Load the parameters and emit the call. */
5275# ifdef RT_OS_WINDOWS
5276# ifndef VBOXSTRICTRC_STRICT_ENABLED
5277 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5278 if (cParams > 0)
5279 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5280 if (cParams > 1)
5281 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5282 if (cParams > 2)
5283 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5284# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5285 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5286 if (cParams > 0)
5287 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5288 if (cParams > 1)
5289 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5290 if (cParams > 2)
5291 {
5292 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5293 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5294 }
5295 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5296# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5297# else
5298 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5299 if (cParams > 0)
5300 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5301 if (cParams > 1)
5302 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5303 if (cParams > 2)
5304 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5305# endif
5306
5307 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5308
5309# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5310 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5311# endif
5312
5313#elif RT_ARCH_ARM64
5314 /*
5315 * ARM64:
5316 */
5317 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5318 if (cParams > 0)
5319 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5320 if (cParams > 1)
5321 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5322 if (cParams > 2)
5323 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5324
5325 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5326
5327#else
5328# error "port me"
5329#endif
5330
5331 /*
5332 * Check the status code.
5333 */
5334 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5335
5336 return off;
5337}
5338
5339
5340/**
5341 * Emits the code at the CheckBranchMiss label.
5342 */
5343static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5344{
5345 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5346 if (idxLabel != UINT32_MAX)
5347 {
5348 iemNativeLabelDefine(pReNative, idxLabel, off);
5349
5350 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5351 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5352 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5353
5354 /* jump back to the return sequence. */
5355 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5356 }
5357 return off;
5358}
5359
5360
5361/**
5362 * Emits the code at the NeedCsLimChecking label.
5363 */
5364static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5365{
5366 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5367 if (idxLabel != UINT32_MAX)
5368 {
5369 iemNativeLabelDefine(pReNative, idxLabel, off);
5370
5371 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5372 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5373 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5374
5375 /* jump back to the return sequence. */
5376 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5377 }
5378 return off;
5379}
5380
5381
5382/**
5383 * Emits the code at the ObsoleteTb label.
5384 */
5385static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5386{
5387 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5388 if (idxLabel != UINT32_MAX)
5389 {
5390 iemNativeLabelDefine(pReNative, idxLabel, off);
5391
5392 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5393 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5394 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5395
5396 /* jump back to the return sequence. */
5397 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5398 }
5399 return off;
5400}
5401
5402
5403/**
5404 * Emits the code at the RaiseGP0 label.
5405 */
5406static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5407{
5408 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5409 if (idxLabel != UINT32_MAX)
5410 {
5411 iemNativeLabelDefine(pReNative, idxLabel, off);
5412
5413 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5414 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5415 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5416
5417 /* jump back to the return sequence. */
5418 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5419 }
5420 return off;
5421}
5422
5423
5424/**
5425 * Emits the code at the ReturnWithFlags label (returns
5426 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5427 */
5428static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5429{
5430 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5431 if (idxLabel != UINT32_MAX)
5432 {
5433 iemNativeLabelDefine(pReNative, idxLabel, off);
5434
5435 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5436
5437 /* jump back to the return sequence. */
5438 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5439 }
5440 return off;
5441}
5442
5443
5444/**
5445 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5446 */
5447static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5448{
5449 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5450 if (idxLabel != UINT32_MAX)
5451 {
5452 iemNativeLabelDefine(pReNative, idxLabel, off);
5453
5454 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5455
5456 /* jump back to the return sequence. */
5457 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5458 }
5459 return off;
5460}
5461
5462
5463/**
5464 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5465 */
5466static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5467{
5468 /*
5469 * Generate the rc + rcPassUp fiddling code if needed.
5470 */
5471 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5472 if (idxLabel != UINT32_MAX)
5473 {
5474 iemNativeLabelDefine(pReNative, idxLabel, off);
5475
5476 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5477#ifdef RT_ARCH_AMD64
5478# ifdef RT_OS_WINDOWS
5479# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5480 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5481# endif
5482 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5483 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5484# else
5485 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5486 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5487# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5488 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5489# endif
5490# endif
5491# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5492 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5493# endif
5494
5495#else
5496 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5497 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5498 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5499#endif
5500
5501 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5502 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5503 }
5504 return off;
5505}
5506
5507
5508/**
5509 * Emits a standard epilog.
5510 */
5511static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5512{
5513 *pidxReturnLabel = UINT32_MAX;
5514
5515 /*
5516 * Successful return, so clear the return register (eax, w0).
5517 */
5518 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5519
5520 /*
5521 * Define label for common return point.
5522 */
5523 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5524 *pidxReturnLabel = idxReturn;
5525
5526 /*
5527 * Restore registers and return.
5528 */
5529#ifdef RT_ARCH_AMD64
5530 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5531
5532 /* Reposition esp at the r15 restore point. */
5533 pbCodeBuf[off++] = X86_OP_REX_W;
5534 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5535 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5536 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5537
5538 /* Pop non-volatile registers and return */
5539 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5540 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5541 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5542 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5543 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5544 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5545 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5546 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5547# ifdef RT_OS_WINDOWS
5548 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5549 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5550# endif
5551 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5552 pbCodeBuf[off++] = 0xc9; /* leave */
5553 pbCodeBuf[off++] = 0xc3; /* ret */
5554 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5555
5556#elif RT_ARCH_ARM64
5557 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5558
5559 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5560 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5561 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5562 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5563 IEMNATIVE_FRAME_VAR_SIZE / 8);
5564 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5566 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5567 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5568 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5569 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5570 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5571 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5572 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5573 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5574 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5575 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5576
5577 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5578 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5579 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5580 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5581
5582 /* retab / ret */
5583# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5584 if (1)
5585 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5586 else
5587# endif
5588 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5589
5590#else
5591# error "port me"
5592#endif
5593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5594
5595 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5596}
5597
5598
5599/**
5600 * Emits a standard prolog.
5601 */
5602static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5603{
5604#ifdef RT_ARCH_AMD64
5605 /*
5606 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5607 * reserving 64 bytes for stack variables plus 4 non-register argument
5608 * slots. Fixed register assignment: xBX = pReNative;
5609 *
5610 * Since we always do the same register spilling, we can use the same
5611 * unwind description for all the code.
5612 */
5613 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5614 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5615 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5616 pbCodeBuf[off++] = 0x8b;
5617 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5618 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5619 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5620# ifdef RT_OS_WINDOWS
5621 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5622 pbCodeBuf[off++] = 0x8b;
5623 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5624 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5625 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5626# else
5627 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5628 pbCodeBuf[off++] = 0x8b;
5629 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5630# endif
5631 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5632 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5633 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5634 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5635 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5636 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5637 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5638 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5639
5640 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5641 X86_GREG_xSP,
5642 IEMNATIVE_FRAME_ALIGN_SIZE
5643 + IEMNATIVE_FRAME_VAR_SIZE
5644 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5645 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5646 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5647 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5648 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5649
5650#elif RT_ARCH_ARM64
5651 /*
5652 * We set up a stack frame exactly like on x86, only we have to push the
5653 * return address our selves here. We save all non-volatile registers.
5654 */
5655 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5656
5657# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5658 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5659 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5660 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5661 /* pacibsp */
5662 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5663# endif
5664
5665 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5666 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5667 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5668 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5669 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5670 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5671 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5672 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5673 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5674 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5675 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5676 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5677 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5678 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5679 /* Save the BP and LR (ret address) registers at the top of the frame. */
5680 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5681 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5682 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5683 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5684 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5685 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5686
5687 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5688 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5689
5690 /* mov r28, r0 */
5691 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5692 /* mov r27, r1 */
5693 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5694
5695#else
5696# error "port me"
5697#endif
5698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5699 return off;
5700}
5701
5702
5703
5704
5705/*********************************************************************************************************************************
5706* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5707*********************************************************************************************************************************/
5708
5709#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5710 { \
5711 Assert(pReNative->Core.bmVars == 0); \
5712 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5713 Assert(pReNative->Core.bmStack == 0); \
5714 pReNative->fMc = (a_fMcFlags); \
5715 pReNative->fCImpl = (a_fCImplFlags); \
5716 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5717
5718/** We have to get to the end in recompilation mode, as otherwise we won't
5719 * generate code for all the IEM_MC_IF_XXX branches. */
5720#define IEM_MC_END() \
5721 iemNativeVarFreeAll(pReNative); \
5722 } return off
5723
5724
5725
5726/*********************************************************************************************************************************
5727* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5728*********************************************************************************************************************************/
5729
5730#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5731 pReNative->fMc = 0; \
5732 pReNative->fCImpl = (a_fFlags); \
5733 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5734
5735
5736#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5737 pReNative->fMc = 0; \
5738 pReNative->fCImpl = (a_fFlags); \
5739 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5740
5741DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5742 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5743 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5744{
5745 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5746}
5747
5748
5749#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5750 pReNative->fMc = 0; \
5751 pReNative->fCImpl = (a_fFlags); \
5752 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5753 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5754
5755DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5756 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5757 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5758{
5759 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5760}
5761
5762
5763#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5764 pReNative->fMc = 0; \
5765 pReNative->fCImpl = (a_fFlags); \
5766 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5767 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5768
5769DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5770 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5771 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5772 uint64_t uArg2)
5773{
5774 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5775}
5776
5777
5778
5779/*********************************************************************************************************************************
5780* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5781*********************************************************************************************************************************/
5782
5783/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5784 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5785DECL_INLINE_THROW(uint32_t)
5786iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5787{
5788 /*
5789 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5790 * return with special status code and make the execution loop deal with
5791 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5792 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5793 * could continue w/o interruption, it probably will drop into the
5794 * debugger, so not worth the effort of trying to services it here and we
5795 * just lump it in with the handling of the others.
5796 *
5797 * To simplify the code and the register state management even more (wrt
5798 * immediate in AND operation), we always update the flags and skip the
5799 * extra check associated conditional jump.
5800 */
5801 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5802 <= UINT32_MAX);
5803 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5804 kIemNativeGstRegUse_ForUpdate);
5805 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5806 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5807 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5808 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5809 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5810
5811 /* Free but don't flush the EFLAGS register. */
5812 iemNativeRegFreeTmp(pReNative, idxEflReg);
5813
5814 return off;
5815}
5816
5817
5818#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5819 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5820
5821#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5822 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5823 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5824
5825/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5826DECL_INLINE_THROW(uint32_t)
5827iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5828{
5829 /* Allocate a temporary PC register. */
5830 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5831
5832 /* Perform the addition and store the result. */
5833 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5834 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5835
5836 /* Free but don't flush the PC register. */
5837 iemNativeRegFreeTmp(pReNative, idxPcReg);
5838
5839 return off;
5840}
5841
5842
5843#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5844 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5845
5846#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5847 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5848 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5849
5850/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5851DECL_INLINE_THROW(uint32_t)
5852iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5853{
5854 /* Allocate a temporary PC register. */
5855 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5856
5857 /* Perform the addition and store the result. */
5858 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5859 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5860
5861 /* Free but don't flush the PC register. */
5862 iemNativeRegFreeTmp(pReNative, idxPcReg);
5863
5864 return off;
5865}
5866
5867
5868#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5869 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5870
5871#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5872 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5873 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5874
5875/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5876DECL_INLINE_THROW(uint32_t)
5877iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5878{
5879 /* Allocate a temporary PC register. */
5880 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5881
5882 /* Perform the addition and store the result. */
5883 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5884 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5885 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5886
5887 /* Free but don't flush the PC register. */
5888 iemNativeRegFreeTmp(pReNative, idxPcReg);
5889
5890 return off;
5891}
5892
5893
5894
5895/*********************************************************************************************************************************
5896* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5897*********************************************************************************************************************************/
5898
5899#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5900 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5901 (a_enmEffOpSize), pCallEntry->idxInstr)
5902
5903#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5904 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5905 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5906
5907#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5908 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5909 IEMMODE_16BIT, pCallEntry->idxInstr)
5910
5911#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5912 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5913 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5914
5915#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5916 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5917 IEMMODE_64BIT, pCallEntry->idxInstr)
5918
5919#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5920 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5921 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5922
5923/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5924 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5925 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5926DECL_INLINE_THROW(uint32_t)
5927iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5928 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5929{
5930 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5931
5932 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5933 off = iemNativeRegFlushPendingWrites(pReNative, off);
5934
5935 /* Allocate a temporary PC register. */
5936 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5937
5938 /* Perform the addition. */
5939 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5940
5941 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5942 {
5943 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5944 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5945 }
5946 else
5947 {
5948 /* Just truncate the result to 16-bit IP. */
5949 Assert(enmEffOpSize == IEMMODE_16BIT);
5950 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5951 }
5952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5953
5954 /* Free but don't flush the PC register. */
5955 iemNativeRegFreeTmp(pReNative, idxPcReg);
5956
5957 return off;
5958}
5959
5960
5961#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5962 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5963 (a_enmEffOpSize), pCallEntry->idxInstr)
5964
5965#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5966 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5967 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5968
5969#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5970 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5971 IEMMODE_16BIT, pCallEntry->idxInstr)
5972
5973#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5974 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5975 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5976
5977#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5978 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5979 IEMMODE_32BIT, pCallEntry->idxInstr)
5980
5981#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5982 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5983 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5984
5985/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5986 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5987 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5988DECL_INLINE_THROW(uint32_t)
5989iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5990 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5991{
5992 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5993
5994 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5995 off = iemNativeRegFlushPendingWrites(pReNative, off);
5996
5997 /* Allocate a temporary PC register. */
5998 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5999
6000 /* Perform the addition. */
6001 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6002
6003 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6004 if (enmEffOpSize == IEMMODE_16BIT)
6005 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6006
6007 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6008 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6009
6010 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6011
6012 /* Free but don't flush the PC register. */
6013 iemNativeRegFreeTmp(pReNative, idxPcReg);
6014
6015 return off;
6016}
6017
6018
6019#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
6020 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
6021
6022#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
6023 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
6024 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6025
6026#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
6027 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
6028
6029#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
6030 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
6031 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6032
6033#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
6034 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
6035
6036#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
6037 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
6038 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6039
6040/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6041DECL_INLINE_THROW(uint32_t)
6042iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6043 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6044{
6045 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6046 off = iemNativeRegFlushPendingWrites(pReNative, off);
6047
6048 /* Allocate a temporary PC register. */
6049 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6050
6051 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6052 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6053 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6054 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6055 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6056
6057 /* Free but don't flush the PC register. */
6058 iemNativeRegFreeTmp(pReNative, idxPcReg);
6059
6060 return off;
6061}
6062
6063
6064
6065/*********************************************************************************************************************************
6066* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6067*********************************************************************************************************************************/
6068
6069/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6070#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6071 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6072
6073/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6074#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6075 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6076
6077/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6078#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6079 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6080
6081/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6082 * clears flags. */
6083#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6084 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6085 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6086
6087/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6088 * clears flags. */
6089#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6090 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6091 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6092
6093/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6094 * clears flags. */
6095#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6096 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6098
6099#undef IEM_MC_SET_RIP_U16_AND_FINISH
6100
6101
6102/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6103#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6104 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6105
6106/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6107#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6108 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6109
6110/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6111 * clears flags. */
6112#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6113 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6114 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6115
6116/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6117 * and clears flags. */
6118#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6119 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6120 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6121
6122#undef IEM_MC_SET_RIP_U32_AND_FINISH
6123
6124
6125/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6126#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6127 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6128
6129/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6130 * and clears flags. */
6131#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6132 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6133 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6134
6135#undef IEM_MC_SET_RIP_U64_AND_FINISH
6136
6137
6138/** Same as iemRegRipJumpU16AndFinishNoFlags,
6139 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6140DECL_INLINE_THROW(uint32_t)
6141iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6142 uint8_t idxInstr, uint8_t cbVar)
6143{
6144 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6145 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
6146
6147 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6148 off = iemNativeRegFlushPendingWrites(pReNative, off);
6149
6150 /* Get a register with the new PC loaded from idxVarPc.
6151 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6152 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6153
6154 /* Check limit (may #GP(0) + exit TB). */
6155 if (!f64Bit)
6156 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6157 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6158 else if (cbVar > sizeof(uint32_t))
6159 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6160
6161 /* Store the result. */
6162 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6163
6164 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6165 /** @todo implictly free the variable? */
6166
6167 return off;
6168}
6169
6170
6171
6172/*********************************************************************************************************************************
6173* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6174*********************************************************************************************************************************/
6175
6176/**
6177 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6178 *
6179 * @returns Pointer to the condition stack entry on success, NULL on failure
6180 * (too many nestings)
6181 */
6182DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6183{
6184 uint32_t const idxStack = pReNative->cCondDepth;
6185 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6186
6187 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6188 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6189
6190 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6191 pEntry->fInElse = false;
6192 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6193 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6194
6195 return pEntry;
6196}
6197
6198
6199/**
6200 * Start of the if-block, snapshotting the register and variable state.
6201 */
6202DECL_INLINE_THROW(void)
6203iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6204{
6205 Assert(offIfBlock != UINT32_MAX);
6206 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6207 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6208 Assert(!pEntry->fInElse);
6209
6210 /* Define the start of the IF block if request or for disassembly purposes. */
6211 if (idxLabelIf != UINT32_MAX)
6212 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6213#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6214 else
6215 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6216#else
6217 RT_NOREF(offIfBlock);
6218#endif
6219
6220 /* Copy the initial state so we can restore it in the 'else' block. */
6221 pEntry->InitialState = pReNative->Core;
6222}
6223
6224
6225#define IEM_MC_ELSE() } while (0); \
6226 off = iemNativeEmitElse(pReNative, off); \
6227 do {
6228
6229/** Emits code related to IEM_MC_ELSE. */
6230DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6231{
6232 /* Check sanity and get the conditional stack entry. */
6233 Assert(off != UINT32_MAX);
6234 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6235 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6236 Assert(!pEntry->fInElse);
6237
6238 /* Jump to the endif */
6239 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6240
6241 /* Define the else label and enter the else part of the condition. */
6242 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6243 pEntry->fInElse = true;
6244
6245 /* Snapshot the core state so we can do a merge at the endif and restore
6246 the snapshot we took at the start of the if-block. */
6247 pEntry->IfFinalState = pReNative->Core;
6248 pReNative->Core = pEntry->InitialState;
6249
6250 return off;
6251}
6252
6253
6254#define IEM_MC_ENDIF() } while (0); \
6255 off = iemNativeEmitEndIf(pReNative, off)
6256
6257/** Emits code related to IEM_MC_ENDIF. */
6258DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6259{
6260 /* Check sanity and get the conditional stack entry. */
6261 Assert(off != UINT32_MAX);
6262 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6263 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6264
6265 /*
6266 * Now we have find common group with the core state at the end of the
6267 * if-final. Use the smallest common denominator and just drop anything
6268 * that isn't the same in both states.
6269 */
6270 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
6271 * which is why we're doing this at the end of the else-block.
6272 * But we'd need more info about future for that to be worth the effort. */
6273 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
6274 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
6275 {
6276 /* shadow guest stuff first. */
6277 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
6278 if (fGstRegs)
6279 {
6280 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
6281 do
6282 {
6283 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
6284 fGstRegs &= ~RT_BIT_64(idxGstReg);
6285
6286 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6287 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
6288 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
6289 {
6290 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
6291 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
6292 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6293 }
6294 } while (fGstRegs);
6295 }
6296 else
6297 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
6298
6299 /* Check variables next. For now we must require them to be identical
6300 or stuff we can recreate. */
6301 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
6302 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
6303 if (fVars)
6304 {
6305 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
6306 do
6307 {
6308 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
6309 fVars &= ~RT_BIT_32(idxVar);
6310
6311 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
6312 {
6313 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
6314 continue;
6315 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6316 {
6317 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6318 if (idxHstReg != UINT8_MAX)
6319 {
6320 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6321 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6322 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
6323 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6324 }
6325 continue;
6326 }
6327 }
6328 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
6329 continue;
6330
6331 /* Irreconcilable, so drop it. */
6332 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6333 if (idxHstReg != UINT8_MAX)
6334 {
6335 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6336 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6337 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
6338 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6339 }
6340 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
6341 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6342 } while (fVars);
6343 }
6344
6345 /* Finally, check that the host register allocations matches. */
6346 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
6347 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6348 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6349 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6350 }
6351
6352 /*
6353 * Define the endif label and maybe the else one if we're still in the 'if' part.
6354 */
6355 if (!pEntry->fInElse)
6356 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6357 else
6358 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6359 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6360
6361 /* Pop the conditional stack.*/
6362 pReNative->cCondDepth -= 1;
6363
6364 return off;
6365}
6366
6367
6368#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6369 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6370 do {
6371
6372/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6373DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6374{
6375 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6376
6377 /* Get the eflags. */
6378 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6379 kIemNativeGstRegUse_ReadOnly);
6380
6381 /* Test and jump. */
6382 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6383
6384 /* Free but don't flush the EFlags register. */
6385 iemNativeRegFreeTmp(pReNative, idxEflReg);
6386
6387 /* Make a copy of the core state now as we start the if-block. */
6388 iemNativeCondStartIfBlock(pReNative, off);
6389
6390 return off;
6391}
6392
6393
6394#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6395 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6396 do {
6397
6398/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6399DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6400{
6401 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6402
6403 /* Get the eflags. */
6404 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6405 kIemNativeGstRegUse_ReadOnly);
6406
6407 /* Test and jump. */
6408 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6409
6410 /* Free but don't flush the EFlags register. */
6411 iemNativeRegFreeTmp(pReNative, idxEflReg);
6412
6413 /* Make a copy of the core state now as we start the if-block. */
6414 iemNativeCondStartIfBlock(pReNative, off);
6415
6416 return off;
6417}
6418
6419
6420#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
6421 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
6422 do {
6423
6424/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
6425DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6426{
6427 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6428
6429 /* Get the eflags. */
6430 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6431 kIemNativeGstRegUse_ReadOnly);
6432
6433 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6434 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6435
6436 /* Test and jump. */
6437 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6438
6439 /* Free but don't flush the EFlags register. */
6440 iemNativeRegFreeTmp(pReNative, idxEflReg);
6441
6442 /* Make a copy of the core state now as we start the if-block. */
6443 iemNativeCondStartIfBlock(pReNative, off);
6444
6445 return off;
6446}
6447
6448
6449#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
6450 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
6451 do {
6452
6453/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
6454DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6455{
6456 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6457
6458 /* Get the eflags. */
6459 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6460 kIemNativeGstRegUse_ReadOnly);
6461
6462 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6463 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6464
6465 /* Test and jump. */
6466 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6467
6468 /* Free but don't flush the EFlags register. */
6469 iemNativeRegFreeTmp(pReNative, idxEflReg);
6470
6471 /* Make a copy of the core state now as we start the if-block. */
6472 iemNativeCondStartIfBlock(pReNative, off);
6473
6474 return off;
6475}
6476
6477
6478#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6479 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6480 do {
6481
6482#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6483 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6484 do {
6485
6486/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6487DECL_INLINE_THROW(uint32_t)
6488iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6489 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6490{
6491 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6492
6493 /* Get the eflags. */
6494 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6495 kIemNativeGstRegUse_ReadOnly);
6496
6497 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6498 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6499
6500 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6501 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6502 Assert(iBitNo1 != iBitNo2);
6503
6504#ifdef RT_ARCH_AMD64
6505 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6506
6507 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6508 if (iBitNo1 > iBitNo2)
6509 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6510 else
6511 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6512 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6513
6514#elif defined(RT_ARCH_ARM64)
6515 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6516 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6517
6518 /* and tmpreg, eflreg, #1<<iBitNo1 */
6519 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6520
6521 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6522 if (iBitNo1 > iBitNo2)
6523 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6524 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6525 else
6526 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6527 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6528
6529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6530
6531#else
6532# error "Port me"
6533#endif
6534
6535 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6536 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6537 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6538
6539 /* Free but don't flush the EFlags and tmp registers. */
6540 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6541 iemNativeRegFreeTmp(pReNative, idxEflReg);
6542
6543 /* Make a copy of the core state now as we start the if-block. */
6544 iemNativeCondStartIfBlock(pReNative, off);
6545
6546 return off;
6547}
6548
6549
6550#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6551 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6552 do {
6553
6554#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6555 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6556 do {
6557
6558/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6559 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6560DECL_INLINE_THROW(uint32_t)
6561iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6562 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6563{
6564 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6565
6566 /* We need an if-block label for the non-inverted variant. */
6567 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6568 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6569
6570 /* Get the eflags. */
6571 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6572 kIemNativeGstRegUse_ReadOnly);
6573
6574 /* Translate the flag masks to bit numbers. */
6575 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6576 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6577
6578 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6579 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6580 Assert(iBitNo1 != iBitNo);
6581
6582 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6583 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6584 Assert(iBitNo2 != iBitNo);
6585 Assert(iBitNo2 != iBitNo1);
6586
6587#ifdef RT_ARCH_AMD64
6588 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6589#elif defined(RT_ARCH_ARM64)
6590 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6591#endif
6592
6593 /* Check for the lone bit first. */
6594 if (!fInverted)
6595 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6596 else
6597 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6598
6599 /* Then extract and compare the other two bits. */
6600#ifdef RT_ARCH_AMD64
6601 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6602 if (iBitNo1 > iBitNo2)
6603 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6604 else
6605 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6606 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6607
6608#elif defined(RT_ARCH_ARM64)
6609 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6610
6611 /* and tmpreg, eflreg, #1<<iBitNo1 */
6612 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6613
6614 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6615 if (iBitNo1 > iBitNo2)
6616 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6617 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6618 else
6619 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6620 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6621
6622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6623
6624#else
6625# error "Port me"
6626#endif
6627
6628 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6629 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6630 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6631
6632 /* Free but don't flush the EFlags and tmp registers. */
6633 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6634 iemNativeRegFreeTmp(pReNative, idxEflReg);
6635
6636 /* Make a copy of the core state now as we start the if-block. */
6637 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6638
6639 return off;
6640}
6641
6642
6643#define IEM_MC_IF_CX_IS_NZ() \
6644 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6645 do {
6646
6647/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6648DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6649{
6650 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6651
6652 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6653 kIemNativeGstRegUse_ReadOnly);
6654 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6655 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6656
6657 iemNativeCondStartIfBlock(pReNative, off);
6658 return off;
6659}
6660
6661
6662#define IEM_MC_IF_ECX_IS_NZ() \
6663 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6664 do {
6665
6666#define IEM_MC_IF_RCX_IS_NZ() \
6667 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6668 do {
6669
6670/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6671DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6672{
6673 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6674
6675 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6676 kIemNativeGstRegUse_ReadOnly);
6677 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6678 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6679
6680 iemNativeCondStartIfBlock(pReNative, off);
6681 return off;
6682}
6683
6684
6685#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6686 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6687 do {
6688
6689#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6690 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6691 do {
6692
6693/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6694DECL_INLINE_THROW(uint32_t)
6695iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6696{
6697 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6698
6699 /* We have to load both RCX and EFLAGS before we can start branching,
6700 otherwise we'll end up in the else-block with an inconsistent
6701 register allocator state.
6702 Doing EFLAGS first as it's more likely to be loaded, right? */
6703 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6704 kIemNativeGstRegUse_ReadOnly);
6705 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6706 kIemNativeGstRegUse_ReadOnly);
6707
6708 /** @todo we could reduce this to a single branch instruction by spending a
6709 * temporary register and some setnz stuff. Not sure if loops are
6710 * worth it. */
6711 /* Check CX. */
6712 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6713
6714 /* Check the EFlags bit. */
6715 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6716 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6717 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6718 !fCheckIfSet /*fJmpIfSet*/);
6719
6720 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6721 iemNativeRegFreeTmp(pReNative, idxEflReg);
6722
6723 iemNativeCondStartIfBlock(pReNative, off);
6724 return off;
6725}
6726
6727
6728#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6729 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6730 do {
6731
6732#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6733 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6734 do {
6735
6736#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6737 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6738 do {
6739
6740#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6741 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6742 do {
6743
6744/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
6745 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
6746 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
6747 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
6748DECL_INLINE_THROW(uint32_t)
6749iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6750 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6751{
6752 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6753
6754 /* We have to load both RCX and EFLAGS before we can start branching,
6755 otherwise we'll end up in the else-block with an inconsistent
6756 register allocator state.
6757 Doing EFLAGS first as it's more likely to be loaded, right? */
6758 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6759 kIemNativeGstRegUse_ReadOnly);
6760 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6761 kIemNativeGstRegUse_ReadOnly);
6762
6763 /** @todo we could reduce this to a single branch instruction by spending a
6764 * temporary register and some setnz stuff. Not sure if loops are
6765 * worth it. */
6766 /* Check RCX/ECX. */
6767 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6768
6769 /* Check the EFlags bit. */
6770 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6771 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6772 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6773 !fCheckIfSet /*fJmpIfSet*/);
6774
6775 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6776 iemNativeRegFreeTmp(pReNative, idxEflReg);
6777
6778 iemNativeCondStartIfBlock(pReNative, off);
6779 return off;
6780}
6781
6782
6783
6784/*********************************************************************************************************************************
6785* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6786*********************************************************************************************************************************/
6787/** Number of hidden arguments for CIMPL calls.
6788 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
6789#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6790# define IEM_CIMPL_HIDDEN_ARGS 3
6791#else
6792# define IEM_CIMPL_HIDDEN_ARGS 2
6793#endif
6794
6795#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
6796 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
6797
6798#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
6799 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
6800
6801#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
6802 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
6803
6804#define IEM_MC_LOCAL(a_Type, a_Name) \
6805 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
6806
6807#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
6808 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
6809
6810
6811/**
6812 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
6813 */
6814DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
6815{
6816 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
6817 return IEM_CIMPL_HIDDEN_ARGS;
6818 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
6819 return 1;
6820 return 0;
6821}
6822
6823
6824/**
6825 * Internal work that allocates a variable with kind set to
6826 * kIemNativeVarKind_Invalid and no current stack allocation.
6827 *
6828 * The kind will either be set by the caller or later when the variable is first
6829 * assigned a value.
6830 */
6831static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6832{
6833 Assert(cbType > 0 && cbType <= 64);
6834 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6835 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6836 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6837 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6838 pReNative->Core.aVars[idxVar].cbVar = cbType;
6839 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6840 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6841 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6842 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6843 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6844 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6845 pReNative->Core.aVars[idxVar].u.uValue = 0;
6846 return idxVar;
6847}
6848
6849
6850/**
6851 * Internal work that allocates an argument variable w/o setting enmKind.
6852 */
6853static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6854{
6855 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6856 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6857 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6858
6859 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6860 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
6861 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6862 return idxVar;
6863}
6864
6865
6866/**
6867 * Gets the stack slot for a stack variable, allocating one if necessary.
6868 *
6869 * Calling this function implies that the stack slot will contain a valid
6870 * variable value. The caller deals with any register currently assigned to the
6871 * variable, typically by spilling it into the stack slot.
6872 *
6873 * @returns The stack slot number.
6874 * @param pReNative The recompiler state.
6875 * @param idxVar The variable.
6876 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6877 */
6878DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6879{
6880 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6881 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6882
6883 /* Already got a slot? */
6884 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6885 if (idxStackSlot != UINT8_MAX)
6886 {
6887 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6888 return idxStackSlot;
6889 }
6890
6891 /*
6892 * A single slot is easy to allocate.
6893 * Allocate them from the top end, closest to BP, to reduce the displacement.
6894 */
6895 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6896 {
6897 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6898 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6899 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6900 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6901 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6902 return (uint8_t)iSlot;
6903 }
6904
6905 /*
6906 * We need more than one stack slot.
6907 *
6908 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6909 */
6910 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6911 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6912 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6913 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6914 uint32_t bmStack = ~pReNative->Core.bmStack;
6915 while (bmStack != UINT32_MAX)
6916 {
6917/** @todo allocate from the top to reduce BP displacement. */
6918 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6919 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6920 if (!(iSlot & fBitAlignMask))
6921 {
6922 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6923 {
6924 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6925 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6926 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6927 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6928 return (uint8_t)iSlot;
6929 }
6930 }
6931 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6932 }
6933 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6934}
6935
6936
6937/**
6938 * Changes the variable to a stack variable.
6939 *
6940 * Currently this is s only possible to do the first time the variable is used,
6941 * switching later is can be implemented but not done.
6942 *
6943 * @param pReNative The recompiler state.
6944 * @param idxVar The variable.
6945 * @throws VERR_IEM_VAR_IPE_2
6946 */
6947static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6948{
6949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6950 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6951 {
6952 /* We could in theory transition from immediate to stack as well, but it
6953 would involve the caller doing work storing the value on the stack. So,
6954 till that's required we only allow transition from invalid. */
6955 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6956 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6957 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6958 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6959
6960 /* Note! We don't allocate a stack slot here, that's only done when a
6961 slot is actually needed to hold a variable value. */
6962 }
6963}
6964
6965
6966/**
6967 * Sets it to a variable with a constant value.
6968 *
6969 * This does not require stack storage as we know the value and can always
6970 * reload it, unless of course it's referenced.
6971 *
6972 * @param pReNative The recompiler state.
6973 * @param idxVar The variable.
6974 * @param uValue The immediate value.
6975 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6976 */
6977static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6978{
6979 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6980 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6981 {
6982 /* Only simple transitions for now. */
6983 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6984 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6985 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6986 }
6987 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6988
6989 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6990 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
6991 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
6992 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
6993}
6994
6995
6996/**
6997 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6998 *
6999 * This does not require stack storage as we know the value and can always
7000 * reload it. Loading is postponed till needed.
7001 *
7002 * @param pReNative The recompiler state.
7003 * @param idxVar The variable.
7004 * @param idxOtherVar The variable to take the (stack) address of.
7005 *
7006 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7007 */
7008static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7009{
7010 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7011 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7012
7013 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7014 {
7015 /* Only simple transitions for now. */
7016 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7017 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7018 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7019 }
7020 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7021
7022 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
7023
7024 /* Update the other variable, ensure it's a stack variable. */
7025 /** @todo handle variables with const values... that'll go boom now. */
7026 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7027 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
7028}
7029
7030
7031/**
7032 * Sets the variable to a reference (pointer) to a guest register reference.
7033 *
7034 * This does not require stack storage as we know the value and can always
7035 * reload it. Loading is postponed till needed.
7036 *
7037 * @param pReNative The recompiler state.
7038 * @param idxVar The variable.
7039 * @param enmRegClass The class guest registers to reference.
7040 * @param idxReg The register within @a enmRegClass to reference.
7041 *
7042 * @throws VERR_IEM_VAR_IPE_2
7043 */
7044static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7045 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7046{
7047 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7048
7049 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
7050 {
7051 /* Only simple transitions for now. */
7052 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7053 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7054 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
7055 }
7056 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7057
7058 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
7059 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
7060}
7061
7062
7063DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7064{
7065 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7066}
7067
7068
7069DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7070{
7071 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7072
7073 /* Since we're using a generic uint64_t value type, we must truncate it if
7074 the variable is smaller otherwise we may end up with too large value when
7075 scaling up a imm8 w/ sign-extension.
7076
7077 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7078 in the bios, bx=1) when running on arm, because clang expect 16-bit
7079 register parameters to have bits 16 and up set to zero. Instead of
7080 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7081 CF value in the result. */
7082 switch (cbType)
7083 {
7084 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7085 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7086 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7087 }
7088 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7089 return idxVar;
7090}
7091
7092
7093DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7094{
7095 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7096 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7097 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7098 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7099
7100 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7101 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
7102 return idxArgVar;
7103}
7104
7105
7106DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7107{
7108 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7109 /* Don't set to stack now, leave that to the first use as for instance
7110 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7111 return idxVar;
7112}
7113
7114
7115DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7116{
7117 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7118
7119 /* Since we're using a generic uint64_t value type, we must truncate it if
7120 the variable is smaller otherwise we may end up with too large value when
7121 scaling up a imm8 w/ sign-extension. */
7122 switch (cbType)
7123 {
7124 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7125 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7126 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7127 }
7128 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7129 return idxVar;
7130}
7131
7132
7133/**
7134 * Releases the variable's register.
7135 *
7136 * The register must have been previously acquired calling
7137 * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
7138 * iemNativeVarRegisterSetAndAcquire().
7139 */
7140DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7141{
7142 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7143 Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
7144 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7145}
7146
7147
7148/**
7149 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7150 * fixed till we call iemNativeVarRegisterRelease.
7151 *
7152 * @returns The host register number.
7153 * @param pReNative The recompiler state.
7154 * @param idxVar The variable.
7155 * @param poff Pointer to the instruction buffer offset.
7156 * In case a register needs to be freed up or the value
7157 * loaded off the stack.
7158 * @param fInitialized Set if the variable must already have been initialized.
7159 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7160 * the case.
7161 * @param idxRegPref Preferred register number or UINT8_MAX.
7162 */
7163DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7164 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
7165{
7166 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7167 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
7168 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7169
7170 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7171 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7172 {
7173 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
7174 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7175 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7176 return idxReg;
7177 }
7178
7179 /*
7180 * If the kind of variable has not yet been set, default to 'stack'.
7181 */
7182 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
7183 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7184 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
7185 iemNativeVarSetKindToStack(pReNative, idxVar);
7186
7187 /*
7188 * We have to allocate a register for the variable, even if its a stack one
7189 * as we don't know if there are modification being made to it before its
7190 * finalized (todo: analyze and insert hints about that?).
7191 *
7192 * If we can, we try get the correct register for argument variables. This
7193 * is assuming that most argument variables are fetched as close as possible
7194 * to the actual call, so that there aren't any interfering hidden calls
7195 * (memory accesses, etc) inbetween.
7196 *
7197 * If we cannot or it's a variable, we make sure no argument registers
7198 * that will be used by this MC block will be allocated here, and we always
7199 * prefer non-volatile registers to avoid needing to spill stuff for internal
7200 * call.
7201 */
7202 /** @todo Detect too early argument value fetches and warn about hidden
7203 * calls causing less optimal code to be generated in the python script. */
7204
7205 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7206 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7207 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7208 {
7209 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7210 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7211 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7212 }
7213 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7214 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7215 {
7216 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7217 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7218 & ~pReNative->Core.bmHstRegsWithGstShadow
7219 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7220 & fNotArgsMask;
7221 if (fRegs)
7222 {
7223 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7224 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7225 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7226 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7227 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7228 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7229 }
7230 else
7231 {
7232 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7233 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7234 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7235 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7236 }
7237 }
7238 else
7239 {
7240 idxReg = idxRegPref;
7241 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7242 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
7243 }
7244 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7245 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7246
7247 /*
7248 * Load it off the stack if we've got a stack slot.
7249 */
7250 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7251 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7252 {
7253 Assert(fInitialized);
7254 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7255 switch (pReNative->Core.aVars[idxVar].cbVar)
7256 {
7257 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7258 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7259 case 3: AssertFailed(); RT_FALL_THRU();
7260 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7261 default: AssertFailed(); RT_FALL_THRU();
7262 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7263 }
7264 }
7265 else
7266 {
7267 Assert(idxStackSlot == UINT8_MAX);
7268 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7269 }
7270 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7271 return idxReg;
7272}
7273
7274
7275/**
7276 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7277 * guest register.
7278 *
7279 * This function makes sure there is a register for it and sets it to be the
7280 * current shadow copy of @a enmGstReg.
7281 *
7282 * @returns The host register number.
7283 * @param pReNative The recompiler state.
7284 * @param idxVar The variable.
7285 * @param enmGstReg The guest register this variable will be written to
7286 * after this call.
7287 * @param poff Pointer to the instruction buffer offset.
7288 * In case a register needs to be freed up or if the
7289 * variable content needs to be loaded off the stack.
7290 *
7291 * @note We DO NOT expect @a idxVar to be an argument variable,
7292 * because we can only in the commit stage of an instruction when this
7293 * function is used.
7294 */
7295DECL_HIDDEN_THROW(uint8_t)
7296iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7297{
7298 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7299 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7300 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
7301 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
7302 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
7303 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
7304 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7305 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7306
7307 /*
7308 * This shouldn't ever be used for arguments, unless it's in a weird else
7309 * branch that doesn't do any calling and even then it's questionable.
7310 *
7311 * However, in case someone writes crazy wrong MC code and does register
7312 * updates before making calls, just use the regular register allocator to
7313 * ensure we get a register suitable for the intended argument number.
7314 */
7315 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7316
7317 /*
7318 * If there is already a register for the variable, we transfer/set the
7319 * guest shadow copy assignment to it.
7320 */
7321 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7322 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7323 {
7324 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7325 {
7326 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7327 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7328 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7329 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7330 }
7331 else
7332 {
7333 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7334 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7335 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7336 }
7337 /** @todo figure this one out. We need some way of making sure the register isn't
7338 * modified after this point, just in case we start writing crappy MC code. */
7339 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
7340 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7341 return idxReg;
7342 }
7343 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7344
7345 /*
7346 * Because this is supposed to be the commit stage, we're just tag along with the
7347 * temporary register allocator and upgrade it to a variable register.
7348 */
7349 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7350 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7351 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7352 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7353 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7354 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7355
7356 /*
7357 * Now we need to load the register value.
7358 */
7359 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
7360 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
7361 else
7362 {
7363 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7364 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7365 switch (pReNative->Core.aVars[idxVar].cbVar)
7366 {
7367 case sizeof(uint64_t):
7368 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7369 break;
7370 case sizeof(uint32_t):
7371 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7372 break;
7373 case sizeof(uint16_t):
7374 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7375 break;
7376 case sizeof(uint8_t):
7377 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7378 break;
7379 default:
7380 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7381 }
7382 }
7383
7384 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7385 return idxReg;
7386}
7387
7388
7389/**
7390 * Sets the host register for @a idxVarRc to @a idxReg.
7391 *
7392 * The register must not be allocated. Any guest register shadowing will be
7393 * implictly dropped by this call.
7394 *
7395 * The variable must not have any register associated with it (causes
7396 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
7397 * implied.
7398 *
7399 * @returns idxReg
7400 * @param pReNative The recompiler state.
7401 * @param idxVar The variable.
7402 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
7403 * @param off For recording in debug info.
7404 *
7405 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
7406 */
7407DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
7408{
7409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7410 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7411 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
7412 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
7413 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
7414
7415 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
7416 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7417
7418 iemNativeVarSetKindToStack(pReNative, idxVar);
7419 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7420
7421 return idxReg;
7422}
7423
7424
7425/**
7426 * A convenient helper function.
7427 */
7428DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7429 uint8_t idxReg, uint32_t *poff)
7430{
7431 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
7432 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7433 return idxReg;
7434}
7435
7436
7437/**
7438 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7439 *
7440 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7441 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7442 * requirement of flushing anything in volatile host registers when making a
7443 * call.
7444 *
7445 * @returns New @a off value.
7446 * @param pReNative The recompiler state.
7447 * @param off The code buffer position.
7448 * @param fHstRegsNotToSave Set of registers not to save & restore.
7449 */
7450DECL_INLINE_THROW(uint32_t)
7451iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7452{
7453 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7454 if (fHstRegs)
7455 {
7456 do
7457 {
7458 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7459 fHstRegs &= ~RT_BIT_32(idxHstReg);
7460
7461 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7462 {
7463 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7464 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7465 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7466 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7467 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7468 switch (pReNative->Core.aVars[idxVar].enmKind)
7469 {
7470 case kIemNativeVarKind_Stack:
7471 {
7472 /* Temporarily spill the variable register. */
7473 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7474 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7475 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7476 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7477 continue;
7478 }
7479
7480 case kIemNativeVarKind_Immediate:
7481 case kIemNativeVarKind_VarRef:
7482 case kIemNativeVarKind_GstRegRef:
7483 /* It is weird to have any of these loaded at this point. */
7484 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7485 continue;
7486
7487 case kIemNativeVarKind_End:
7488 case kIemNativeVarKind_Invalid:
7489 break;
7490 }
7491 AssertFailed();
7492 }
7493 else
7494 {
7495 /*
7496 * Allocate a temporary stack slot and spill the register to it.
7497 */
7498 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7499 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7500 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7501 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7502 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7503 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7504 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7505 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7506 }
7507 } while (fHstRegs);
7508 }
7509 return off;
7510}
7511
7512
7513/**
7514 * Emit code to restore volatile registers after to a call to a helper.
7515 *
7516 * @returns New @a off value.
7517 * @param pReNative The recompiler state.
7518 * @param off The code buffer position.
7519 * @param fHstRegsNotToSave Set of registers not to save & restore.
7520 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7521 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7522 */
7523DECL_INLINE_THROW(uint32_t)
7524iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7525{
7526 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7527 if (fHstRegs)
7528 {
7529 do
7530 {
7531 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7532 fHstRegs &= ~RT_BIT_32(idxHstReg);
7533
7534 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7535 {
7536 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7537 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7538 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7539 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7540 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7541 switch (pReNative->Core.aVars[idxVar].enmKind)
7542 {
7543 case kIemNativeVarKind_Stack:
7544 {
7545 /* Unspill the variable register. */
7546 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7547 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7548 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7549 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7550 continue;
7551 }
7552
7553 case kIemNativeVarKind_Immediate:
7554 case kIemNativeVarKind_VarRef:
7555 case kIemNativeVarKind_GstRegRef:
7556 /* It is weird to have any of these loaded at this point. */
7557 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7558 continue;
7559
7560 case kIemNativeVarKind_End:
7561 case kIemNativeVarKind_Invalid:
7562 break;
7563 }
7564 AssertFailed();
7565 }
7566 else
7567 {
7568 /*
7569 * Restore from temporary stack slot.
7570 */
7571 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7572 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7573 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7574 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7575
7576 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7577 }
7578 } while (fHstRegs);
7579 }
7580 return off;
7581}
7582
7583
7584/**
7585 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7586 *
7587 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7588 */
7589DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7590{
7591 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7592 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7593 {
7594 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7595 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7596 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7597 Assert(cSlots > 0);
7598 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7599 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
7600 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7601 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7602 }
7603 else
7604 Assert(idxStackSlot == UINT8_MAX);
7605}
7606
7607
7608/**
7609 * Worker that frees a single variable.
7610 *
7611 * ASSUMES that @a idxVar is valid.
7612 */
7613DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7614{
7615 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7616 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7617 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7618
7619 /* Free the host register first if any assigned. */
7620 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7621 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7622 {
7623 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7624 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7625 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7626 }
7627
7628 /* Free argument mapping. */
7629 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7630 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7631 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7632
7633 /* Free the stack slots. */
7634 iemNativeVarFreeStackSlots(pReNative, idxVar);
7635
7636 /* Free the actual variable. */
7637 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7638 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7639}
7640
7641
7642/**
7643 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7644 */
7645DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7646{
7647 while (bmVars != 0)
7648 {
7649 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7650 bmVars &= ~RT_BIT_32(idxVar);
7651
7652#if 1 /** @todo optimize by simplifying this later... */
7653 iemNativeVarFreeOneWorker(pReNative, idxVar);
7654#else
7655 /* Only need to free the host register, the rest is done as bulk updates below. */
7656 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7657 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7658 {
7659 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7660 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7661 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7662 }
7663#endif
7664 }
7665#if 0 /** @todo optimize by simplifying this later... */
7666 pReNative->Core.bmVars = 0;
7667 pReNative->Core.bmStack = 0;
7668 pReNative->Core.u64ArgVars = UINT64_MAX;
7669#endif
7670}
7671
7672
7673/**
7674 * This is called by IEM_MC_END() to clean up all variables.
7675 */
7676DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
7677{
7678 uint32_t const bmVars = pReNative->Core.bmVars;
7679 if (bmVars != 0)
7680 iemNativeVarFreeAllSlow(pReNative, bmVars);
7681 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7682 Assert(pReNative->Core.bmStack == 0);
7683}
7684
7685
7686#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
7687
7688/**
7689 * This is called by IEM_MC_FREE_LOCAL.
7690 */
7691DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7692{
7693 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7694 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7695 iemNativeVarFreeOneWorker(pReNative, idxVar);
7696}
7697
7698
7699#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7700
7701/**
7702 * This is called by IEM_MC_FREE_ARG.
7703 */
7704DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7705{
7706 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7707 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7708 iemNativeVarFreeOneWorker(pReNative, idxVar);
7709}
7710
7711
7712#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7713
7714/**
7715 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7716 */
7717DECL_INLINE_THROW(uint32_t)
7718iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7719{
7720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7721 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7722 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7723 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7724 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7725
7726 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7727 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7728 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7729 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7730
7731 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7732
7733 /*
7734 * Special case for immediates.
7735 */
7736 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7737 {
7738 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7739 {
7740 case sizeof(uint16_t):
7741 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7742 break;
7743 case sizeof(uint32_t):
7744 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7745 break;
7746 default: AssertFailed(); break;
7747 }
7748 }
7749 else
7750 {
7751 /*
7752 * The generic solution for now.
7753 */
7754 /** @todo optimize this by having the python script make sure the source
7755 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7756 * statement. Then we could just transfer the register assignments. */
7757 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7758 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7759 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7760 {
7761 case sizeof(uint16_t):
7762 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7763 break;
7764 case sizeof(uint32_t):
7765 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
7766 break;
7767 default: AssertFailed(); break;
7768 }
7769 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
7770 iemNativeVarRegisterRelease(pReNative, idxVarDst);
7771 }
7772 return off;
7773}
7774
7775
7776
7777/*********************************************************************************************************************************
7778* Emitters for IEM_MC_CALL_CIMPL_XXX *
7779*********************************************************************************************************************************/
7780
7781/**
7782 * Emits code to load a reference to the given guest register into @a idxGprDst.
7783 */
7784DECL_INLINE_THROW(uint32_t)
7785iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7786 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7787{
7788 /*
7789 * Get the offset relative to the CPUMCTX structure.
7790 */
7791 uint32_t offCpumCtx;
7792 switch (enmClass)
7793 {
7794 case kIemNativeGstRegRef_Gpr:
7795 Assert(idxRegInClass < 16);
7796 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7797 break;
7798
7799 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7800 Assert(idxRegInClass < 4);
7801 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7802 break;
7803
7804 case kIemNativeGstRegRef_EFlags:
7805 Assert(idxRegInClass == 0);
7806 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7807 break;
7808
7809 case kIemNativeGstRegRef_MxCsr:
7810 Assert(idxRegInClass == 0);
7811 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7812 break;
7813
7814 case kIemNativeGstRegRef_FpuReg:
7815 Assert(idxRegInClass < 8);
7816 AssertFailed(); /** @todo what kind of indexing? */
7817 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7818 break;
7819
7820 case kIemNativeGstRegRef_MReg:
7821 Assert(idxRegInClass < 8);
7822 AssertFailed(); /** @todo what kind of indexing? */
7823 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7824 break;
7825
7826 case kIemNativeGstRegRef_XReg:
7827 Assert(idxRegInClass < 16);
7828 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7829 break;
7830
7831 default:
7832 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7833 }
7834
7835 /*
7836 * Load the value into the destination register.
7837 */
7838#ifdef RT_ARCH_AMD64
7839 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7840
7841#elif defined(RT_ARCH_ARM64)
7842 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7843 Assert(offCpumCtx < 4096);
7844 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7845
7846#else
7847# error "Port me!"
7848#endif
7849
7850 return off;
7851}
7852
7853
7854/**
7855 * Common code for CIMPL and AIMPL calls.
7856 *
7857 * These are calls that uses argument variables and such. They should not be
7858 * confused with internal calls required to implement an MC operation,
7859 * like a TLB load and similar.
7860 *
7861 * Upon return all that is left to do is to load any hidden arguments and
7862 * perform the call. All argument variables are freed.
7863 *
7864 * @returns New code buffer offset; throws VBox status code on error.
7865 * @param pReNative The native recompile state.
7866 * @param off The code buffer offset.
7867 * @param cArgs The total nubmer of arguments (includes hidden
7868 * count).
7869 * @param cHiddenArgs The number of hidden arguments. The hidden
7870 * arguments must not have any variable declared for
7871 * them, whereas all the regular arguments must
7872 * (tstIEMCheckMc ensures this).
7873 */
7874DECL_HIDDEN_THROW(uint32_t)
7875iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7876{
7877#ifdef VBOX_STRICT
7878 /*
7879 * Assert sanity.
7880 */
7881 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7882 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7883 for (unsigned i = 0; i < cHiddenArgs; i++)
7884 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7885 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7886 {
7887 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7888 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7889 }
7890 iemNativeRegAssertSanity(pReNative);
7891#endif
7892
7893 /*
7894 * Before we do anything else, go over variables that are referenced and
7895 * make sure they are not in a register.
7896 */
7897 uint32_t bmVars = pReNative->Core.bmVars;
7898 if (bmVars)
7899 {
7900 do
7901 {
7902 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7903 bmVars &= ~RT_BIT_32(idxVar);
7904
7905 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7906 {
7907 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7908 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7909 {
7910 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7911 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7912 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7913 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7914 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7915
7916 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7917 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7918 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7919 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7920 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7921 }
7922 }
7923 } while (bmVars != 0);
7924#if 0 //def VBOX_STRICT
7925 iemNativeRegAssertSanity(pReNative);
7926#endif
7927 }
7928
7929 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7930
7931 /*
7932 * First, go over the host registers that will be used for arguments and make
7933 * sure they either hold the desired argument or are free.
7934 */
7935 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7936 {
7937 for (uint32_t i = 0; i < cRegArgs; i++)
7938 {
7939 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7940 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7941 {
7942 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7943 {
7944 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7945 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7946 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
7947 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7948 if (uArgNo == i)
7949 { /* prefect */ }
7950 /* The variable allocator logic should make sure this is impossible,
7951 except for when the return register is used as a parameter (ARM,
7952 but not x86). */
7953#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7954 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7955 {
7956# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7957# error "Implement this"
7958# endif
7959 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7960 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7961 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7962 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7963 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7964 }
7965#endif
7966 else
7967 {
7968 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7969
7970 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
7971 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7972 else
7973 {
7974 /* just free it, can be reloaded if used again */
7975 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7976 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7977 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7978 }
7979 }
7980 }
7981 else
7982 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7983 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7984 }
7985 }
7986#if 0 //def VBOX_STRICT
7987 iemNativeRegAssertSanity(pReNative);
7988#endif
7989 }
7990
7991 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7992
7993#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7994 /*
7995 * If there are any stack arguments, make sure they are in their place as well.
7996 *
7997 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7998 * the caller) be loading it later and it must be free (see first loop).
7999 */
8000 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8001 {
8002 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8003 {
8004 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8005 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8006 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8007 {
8008 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8009 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
8010 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
8011 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8012 }
8013 else
8014 {
8015 /* Use ARG0 as temp for stuff we need registers for. */
8016 switch (pReNative->Core.aVars[idxVar].enmKind)
8017 {
8018 case kIemNativeVarKind_Stack:
8019 {
8020 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8021 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8022 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8023 iemNativeStackCalcBpDisp(idxStackSlot));
8024 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8025 continue;
8026 }
8027
8028 case kIemNativeVarKind_Immediate:
8029 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
8030 continue;
8031
8032 case kIemNativeVarKind_VarRef:
8033 {
8034 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8035 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8036 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8037 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8038 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8039 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8040 {
8041 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8042 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8043 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8044 }
8045 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8046 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8047 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8048 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8049 continue;
8050 }
8051
8052 case kIemNativeVarKind_GstRegRef:
8053 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8054 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8055 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8056 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8057 continue;
8058
8059 case kIemNativeVarKind_Invalid:
8060 case kIemNativeVarKind_End:
8061 break;
8062 }
8063 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8064 }
8065 }
8066# if 0 //def VBOX_STRICT
8067 iemNativeRegAssertSanity(pReNative);
8068# endif
8069 }
8070#else
8071 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8072#endif
8073
8074 /*
8075 * Make sure the argument variables are loaded into their respective registers.
8076 *
8077 * We can optimize this by ASSUMING that any register allocations are for
8078 * registeres that have already been loaded and are ready. The previous step
8079 * saw to that.
8080 */
8081 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8082 {
8083 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8084 {
8085 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8086 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8087 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
8088 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8089 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8090 else
8091 {
8092 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8093 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8094 {
8095 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
8096 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
8097 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
8098 | RT_BIT_32(idxArgReg);
8099 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
8100 }
8101 else
8102 {
8103 /* Use ARG0 as temp for stuff we need registers for. */
8104 switch (pReNative->Core.aVars[idxVar].enmKind)
8105 {
8106 case kIemNativeVarKind_Stack:
8107 {
8108 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8109 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8110 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8111 continue;
8112 }
8113
8114 case kIemNativeVarKind_Immediate:
8115 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
8116 continue;
8117
8118 case kIemNativeVarKind_VarRef:
8119 {
8120 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8121 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8122 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8123 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8124 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8125 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8126 {
8127 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8128 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8129 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8130 }
8131 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8132 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8133 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8134 continue;
8135 }
8136
8137 case kIemNativeVarKind_GstRegRef:
8138 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8139 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8140 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8141 continue;
8142
8143 case kIemNativeVarKind_Invalid:
8144 case kIemNativeVarKind_End:
8145 break;
8146 }
8147 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8148 }
8149 }
8150 }
8151#if 0 //def VBOX_STRICT
8152 iemNativeRegAssertSanity(pReNative);
8153#endif
8154 }
8155#ifdef VBOX_STRICT
8156 else
8157 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8158 {
8159 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8160 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8161 }
8162#endif
8163
8164 /*
8165 * Free all argument variables (simplified).
8166 * Their lifetime always expires with the call they are for.
8167 */
8168 /** @todo Make the python script check that arguments aren't used after
8169 * IEM_MC_CALL_XXXX. */
8170 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8171 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8172 * an argument value. There is also some FPU stuff. */
8173 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8174 {
8175 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8176 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8177
8178 /* no need to free registers: */
8179 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8180 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8181 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8182 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8183 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8184 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8185
8186 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8187 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8188 iemNativeVarFreeStackSlots(pReNative, idxVar);
8189 }
8190 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8191
8192 /*
8193 * Flush volatile registers as we make the call.
8194 */
8195 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8196
8197 return off;
8198}
8199
8200
8201/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
8202DECL_HIDDEN_THROW(uint32_t)
8203iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
8204 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
8205
8206{
8207 /*
8208 * Do all the call setup and cleanup.
8209 */
8210 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
8211
8212 /*
8213 * Load the two or three hidden arguments.
8214 */
8215#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8216 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
8217 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8218 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
8219#else
8220 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8221 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
8222#endif
8223
8224 /*
8225 * Make the call and check the return code.
8226 *
8227 * Shadow PC copies are always flushed here, other stuff depends on flags.
8228 * Segment and general purpose registers are explictily flushed via the
8229 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
8230 * macros.
8231 */
8232 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
8233#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8234 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
8235#endif
8236 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
8237 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
8238 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
8239 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
8240
8241 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
8242}
8243
8244
8245#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
8246 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
8247
8248/** Emits code for IEM_MC_CALL_CIMPL_1. */
8249DECL_INLINE_THROW(uint32_t)
8250iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8251 uintptr_t pfnCImpl, uint8_t idxArg0)
8252{
8253 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8254 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
8255}
8256
8257
8258#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
8259 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
8260
8261/** Emits code for IEM_MC_CALL_CIMPL_2. */
8262DECL_INLINE_THROW(uint32_t)
8263iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8264 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
8265{
8266 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8267 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8268 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
8269}
8270
8271
8272#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
8273 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8274 (uintptr_t)a_pfnCImpl, a0, a1, a2)
8275
8276/** Emits code for IEM_MC_CALL_CIMPL_3. */
8277DECL_INLINE_THROW(uint32_t)
8278iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8279 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8280{
8281 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8282 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8283 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8284 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
8285}
8286
8287
8288#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
8289 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8290 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
8291
8292/** Emits code for IEM_MC_CALL_CIMPL_4. */
8293DECL_INLINE_THROW(uint32_t)
8294iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8295 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8296{
8297 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8298 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8299 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8300 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8301 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
8302}
8303
8304
8305#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
8306 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8307 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
8308
8309/** Emits code for IEM_MC_CALL_CIMPL_4. */
8310DECL_INLINE_THROW(uint32_t)
8311iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8312 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
8313{
8314 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8315 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8316 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8317 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8318 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
8319 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
8320}
8321
8322
8323/** Recompiler debugging: Flush guest register shadow copies. */
8324#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
8325
8326
8327
8328/*********************************************************************************************************************************
8329* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
8330*********************************************************************************************************************************/
8331
8332/**
8333 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
8334 */
8335DECL_INLINE_THROW(uint32_t)
8336iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8337 uintptr_t pfnAImpl, uint8_t cArgs)
8338{
8339 if (idxVarRc != UINT8_MAX)
8340 {
8341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
8342 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
8343 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
8344 }
8345
8346 /*
8347 * Do all the call setup and cleanup.
8348 */
8349 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
8350
8351 /*
8352 * Make the call and update the return code variable if we've got one.
8353 */
8354 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8355 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
8356 {
8357pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
8358 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
8359 }
8360
8361 return off;
8362}
8363
8364
8365
8366#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
8367 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
8368
8369#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
8370 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
8371
8372/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
8373DECL_INLINE_THROW(uint32_t)
8374iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
8375{
8376 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
8377}
8378
8379
8380#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
8381 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
8382
8383#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
8384 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
8385
8386/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
8387DECL_INLINE_THROW(uint32_t)
8388iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
8389{
8390 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8391 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
8392}
8393
8394
8395#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
8396 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
8397
8398#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
8399 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
8400
8401/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
8402DECL_INLINE_THROW(uint32_t)
8403iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8404 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8405{
8406 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8407 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8408 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
8409}
8410
8411
8412#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
8413 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
8414
8415#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
8416 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
8417
8418/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
8419DECL_INLINE_THROW(uint32_t)
8420iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8421 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8422{
8423 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8424 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8425 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8426 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
8427}
8428
8429
8430#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
8431 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8432
8433#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
8434 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8435
8436/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
8437DECL_INLINE_THROW(uint32_t)
8438iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8439 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8440{
8441 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8442 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8443 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8444 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
8445 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
8446}
8447
8448
8449
8450/*********************************************************************************************************************************
8451* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
8452*********************************************************************************************************************************/
8453
8454#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
8455 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
8456
8457#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8458 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
8459
8460#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8461 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
8462
8463#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8464 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
8465
8466
8467/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
8468 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
8469DECL_INLINE_THROW(uint32_t)
8470iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
8471{
8472 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8473 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8474 Assert(iGRegEx < 20);
8475
8476 /* Same discussion as in iemNativeEmitFetchGregU16 */
8477 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8478 kIemNativeGstRegUse_ReadOnly);
8479
8480 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8481 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8482
8483 /* The value is zero-extended to the full 64-bit host register width. */
8484 if (iGRegEx < 16)
8485 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8486 else
8487 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8488
8489 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8490 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8491 return off;
8492}
8493
8494
8495#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8496 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
8497
8498#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8499 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
8500
8501#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8502 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
8503
8504/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
8505DECL_INLINE_THROW(uint32_t)
8506iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
8507{
8508 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8509 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8510 Assert(iGRegEx < 20);
8511
8512 /* Same discussion as in iemNativeEmitFetchGregU16 */
8513 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8514 kIemNativeGstRegUse_ReadOnly);
8515
8516 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8517 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8518
8519 if (iGRegEx < 16)
8520 {
8521 switch (cbSignExtended)
8522 {
8523 case sizeof(uint16_t):
8524 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8525 break;
8526 case sizeof(uint32_t):
8527 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8528 break;
8529 case sizeof(uint64_t):
8530 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8531 break;
8532 default: AssertFailed(); break;
8533 }
8534 }
8535 else
8536 {
8537 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8538 switch (cbSignExtended)
8539 {
8540 case sizeof(uint16_t):
8541 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8542 break;
8543 case sizeof(uint32_t):
8544 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8545 break;
8546 case sizeof(uint64_t):
8547 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8548 break;
8549 default: AssertFailed(); break;
8550 }
8551 }
8552
8553 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8554 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8555 return off;
8556}
8557
8558
8559
8560#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
8561 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
8562
8563#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
8564 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8565
8566#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
8567 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8568
8569/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
8570DECL_INLINE_THROW(uint32_t)
8571iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8572{
8573 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8574 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8575 Assert(iGReg < 16);
8576
8577 /*
8578 * We can either just load the low 16-bit of the GPR into a host register
8579 * for the variable, or we can do so via a shadow copy host register. The
8580 * latter will avoid having to reload it if it's being stored later, but
8581 * will waste a host register if it isn't touched again. Since we don't
8582 * know what going to happen, we choose the latter for now.
8583 */
8584 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8585 kIemNativeGstRegUse_ReadOnly);
8586
8587 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8588 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8589 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8590 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8591
8592 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8593 return off;
8594}
8595
8596
8597#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
8598 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8599
8600#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
8601 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8602
8603/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
8604DECL_INLINE_THROW(uint32_t)
8605iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
8606{
8607 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8608 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8609 Assert(iGReg < 16);
8610
8611 /*
8612 * We can either just load the low 16-bit of the GPR into a host register
8613 * for the variable, or we can do so via a shadow copy host register. The
8614 * latter will avoid having to reload it if it's being stored later, but
8615 * will waste a host register if it isn't touched again. Since we don't
8616 * know what going to happen, we choose the latter for now.
8617 */
8618 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8619 kIemNativeGstRegUse_ReadOnly);
8620
8621 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8622 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8623 if (cbSignExtended == sizeof(uint32_t))
8624 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8625 else
8626 {
8627 Assert(cbSignExtended == sizeof(uint64_t));
8628 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8629 }
8630 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8631
8632 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8633 return off;
8634}
8635
8636
8637#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
8638 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
8639
8640#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
8641 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
8642
8643/** Emits code for IEM_MC_FETCH_GREG_U32. */
8644DECL_INLINE_THROW(uint32_t)
8645iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8646{
8647 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8648 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
8649 Assert(iGReg < 16);
8650
8651 /*
8652 * We can either just load the low 16-bit of the GPR into a host register
8653 * for the variable, or we can do so via a shadow copy host register. The
8654 * latter will avoid having to reload it if it's being stored later, but
8655 * will waste a host register if it isn't touched again. Since we don't
8656 * know what going to happen, we choose the latter for now.
8657 */
8658 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8659 kIemNativeGstRegUse_ReadOnly);
8660
8661 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8662 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8663 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8664 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8665
8666 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8667 return off;
8668}
8669
8670
8671#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
8672 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
8673
8674/** Emits code for IEM_MC_FETCH_GREG_U32. */
8675DECL_INLINE_THROW(uint32_t)
8676iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8677{
8678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8679 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8680 Assert(iGReg < 16);
8681
8682 /*
8683 * We can either just load the low 32-bit of the GPR into a host register
8684 * for the variable, or we can do so via a shadow copy host register. The
8685 * latter will avoid having to reload it if it's being stored later, but
8686 * will waste a host register if it isn't touched again. Since we don't
8687 * know what going to happen, we choose the latter for now.
8688 */
8689 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8690 kIemNativeGstRegUse_ReadOnly);
8691
8692 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8693 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8694 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8695 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8696
8697 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8698 return off;
8699}
8700
8701
8702#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8703 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8704
8705#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8706 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8707
8708/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8709 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8710DECL_INLINE_THROW(uint32_t)
8711iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8712{
8713 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8714 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8715 Assert(iGReg < 16);
8716
8717 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8718 kIemNativeGstRegUse_ReadOnly);
8719
8720 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8721 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8722 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8723 /** @todo name the register a shadow one already? */
8724 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8725
8726 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8727 return off;
8728}
8729
8730
8731
8732/*********************************************************************************************************************************
8733* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8734*********************************************************************************************************************************/
8735
8736#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8737 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8738
8739/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8740DECL_INLINE_THROW(uint32_t)
8741iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8742{
8743 Assert(iGRegEx < 20);
8744 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8745 kIemNativeGstRegUse_ForUpdate);
8746#ifdef RT_ARCH_AMD64
8747 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8748
8749 /* To the lowest byte of the register: mov r8, imm8 */
8750 if (iGRegEx < 16)
8751 {
8752 if (idxGstTmpReg >= 8)
8753 pbCodeBuf[off++] = X86_OP_REX_B;
8754 else if (idxGstTmpReg >= 4)
8755 pbCodeBuf[off++] = X86_OP_REX;
8756 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8757 pbCodeBuf[off++] = u8Value;
8758 }
8759 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8760 else if (idxGstTmpReg < 4)
8761 {
8762 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8763 pbCodeBuf[off++] = u8Value;
8764 }
8765 else
8766 {
8767 /* ror reg64, 8 */
8768 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8769 pbCodeBuf[off++] = 0xc1;
8770 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8771 pbCodeBuf[off++] = 8;
8772
8773 /* mov reg8, imm8 */
8774 if (idxGstTmpReg >= 8)
8775 pbCodeBuf[off++] = X86_OP_REX_B;
8776 else if (idxGstTmpReg >= 4)
8777 pbCodeBuf[off++] = X86_OP_REX;
8778 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8779 pbCodeBuf[off++] = u8Value;
8780
8781 /* rol reg64, 8 */
8782 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8783 pbCodeBuf[off++] = 0xc1;
8784 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8785 pbCodeBuf[off++] = 8;
8786 }
8787
8788#elif defined(RT_ARCH_ARM64)
8789 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
8790 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8791 if (iGRegEx < 16)
8792 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
8793 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
8794 else
8795 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
8796 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
8797 iemNativeRegFreeTmp(pReNative, idxImmReg);
8798
8799#else
8800# error "Port me!"
8801#endif
8802
8803 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8804
8805 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8806
8807 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8808 return off;
8809}
8810
8811
8812#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
8813 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
8814
8815/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
8816DECL_INLINE_THROW(uint32_t)
8817iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
8818{
8819 Assert(iGRegEx < 20);
8820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8821
8822 /*
8823 * If it's a constant value (unlikely) we treat this as a
8824 * IEM_MC_STORE_GREG_U8_CONST statement.
8825 */
8826 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8827 { /* likely */ }
8828 else
8829 {
8830 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8831 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8832 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8833 }
8834
8835 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8836 kIemNativeGstRegUse_ForUpdate);
8837 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8838
8839#ifdef RT_ARCH_AMD64
8840 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
8841 if (iGRegEx < 16)
8842 {
8843 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8844 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8845 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8846 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8847 pbCodeBuf[off++] = X86_OP_REX;
8848 pbCodeBuf[off++] = 0x8a;
8849 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8850 }
8851 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
8852 else if (idxGstTmpReg < 4 && idxVarReg < 4)
8853 {
8854 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
8855 pbCodeBuf[off++] = 0x8a;
8856 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
8857 }
8858 else
8859 {
8860 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
8861
8862 /* ror reg64, 8 */
8863 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8864 pbCodeBuf[off++] = 0xc1;
8865 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8866 pbCodeBuf[off++] = 8;
8867
8868 /* mov reg8, reg8(r/m) */
8869 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8870 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8871 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8872 pbCodeBuf[off++] = X86_OP_REX;
8873 pbCodeBuf[off++] = 0x8a;
8874 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8875
8876 /* rol reg64, 8 */
8877 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8878 pbCodeBuf[off++] = 0xc1;
8879 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8880 pbCodeBuf[off++] = 8;
8881 }
8882
8883#elif defined(RT_ARCH_ARM64)
8884 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
8885 or
8886 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
8887 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8888 if (iGRegEx < 16)
8889 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
8890 else
8891 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
8892
8893#else
8894# error "Port me!"
8895#endif
8896 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8897
8898 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8899
8900 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8901 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8902 return off;
8903}
8904
8905
8906
8907#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
8908 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
8909
8910/** Emits code for IEM_MC_STORE_GREG_U16. */
8911DECL_INLINE_THROW(uint32_t)
8912iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
8913{
8914 Assert(iGReg < 16);
8915 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8916 kIemNativeGstRegUse_ForUpdate);
8917#ifdef RT_ARCH_AMD64
8918 /* mov reg16, imm16 */
8919 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8920 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8921 if (idxGstTmpReg >= 8)
8922 pbCodeBuf[off++] = X86_OP_REX_B;
8923 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
8924 pbCodeBuf[off++] = RT_BYTE1(uValue);
8925 pbCodeBuf[off++] = RT_BYTE2(uValue);
8926
8927#elif defined(RT_ARCH_ARM64)
8928 /* movk xdst, #uValue, lsl #0 */
8929 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8930 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
8931
8932#else
8933# error "Port me!"
8934#endif
8935
8936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8937
8938 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8939 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8940 return off;
8941}
8942
8943
8944#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
8945 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
8946
8947/** Emits code for IEM_MC_STORE_GREG_U16. */
8948DECL_INLINE_THROW(uint32_t)
8949iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8950{
8951 Assert(iGReg < 16);
8952 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8953
8954 /*
8955 * If it's a constant value (unlikely) we treat this as a
8956 * IEM_MC_STORE_GREG_U16_CONST statement.
8957 */
8958 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8959 { /* likely */ }
8960 else
8961 {
8962 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8963 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8964 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8965 }
8966
8967 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8968 kIemNativeGstRegUse_ForUpdate);
8969
8970#ifdef RT_ARCH_AMD64
8971 /* mov reg16, reg16 or [mem16] */
8972 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8973 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8974 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8975 {
8976 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
8977 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
8978 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
8979 pbCodeBuf[off++] = 0x8b;
8980 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
8981 }
8982 else
8983 {
8984 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
8985 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8986 if (idxGstTmpReg >= 8)
8987 pbCodeBuf[off++] = X86_OP_REX_R;
8988 pbCodeBuf[off++] = 0x8b;
8989 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8990 }
8991
8992#elif defined(RT_ARCH_ARM64)
8993 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
8994 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8995 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8996 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
8997 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8998
8999#else
9000# error "Port me!"
9001#endif
9002
9003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9004
9005 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9006 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9007 return off;
9008}
9009
9010
9011#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9012 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9013
9014/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9015DECL_INLINE_THROW(uint32_t)
9016iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9017{
9018 Assert(iGReg < 16);
9019 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9020 kIemNativeGstRegUse_ForFullWrite);
9021 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9022 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9023 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9024 return off;
9025}
9026
9027
9028#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9029 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9030
9031/** Emits code for IEM_MC_STORE_GREG_U32. */
9032DECL_INLINE_THROW(uint32_t)
9033iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9034{
9035 Assert(iGReg < 16);
9036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9037
9038 /*
9039 * If it's a constant value (unlikely) we treat this as a
9040 * IEM_MC_STORE_GREG_U32_CONST statement.
9041 */
9042 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9043 { /* likely */ }
9044 else
9045 {
9046 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9047 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9048 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9049 }
9050
9051 /*
9052 * For the rest we allocate a guest register for the variable and writes
9053 * it to the CPUMCTX structure.
9054 */
9055 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9056 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9057#ifdef VBOX_STRICT
9058 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9059#endif
9060 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9061 return off;
9062}
9063
9064
9065#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9066 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9067
9068/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9069DECL_INLINE_THROW(uint32_t)
9070iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9071{
9072 Assert(iGReg < 16);
9073 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9074 kIemNativeGstRegUse_ForFullWrite);
9075 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9076 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9077 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9078 return off;
9079}
9080
9081
9082#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9083 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9084
9085/** Emits code for IEM_MC_STORE_GREG_U64. */
9086DECL_INLINE_THROW(uint32_t)
9087iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9088{
9089 Assert(iGReg < 16);
9090 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9091
9092 /*
9093 * If it's a constant value (unlikely) we treat this as a
9094 * IEM_MC_STORE_GREG_U64_CONST statement.
9095 */
9096 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9097 { /* likely */ }
9098 else
9099 {
9100 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9101 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9102 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
9103 }
9104
9105 /*
9106 * For the rest we allocate a guest register for the variable and writes
9107 * it to the CPUMCTX structure.
9108 */
9109 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9110 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9111 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9112 return off;
9113}
9114
9115
9116#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9117 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9118
9119/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9120DECL_INLINE_THROW(uint32_t)
9121iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9122{
9123 Assert(iGReg < 16);
9124 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9125 kIemNativeGstRegUse_ForUpdate);
9126 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9127 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9128 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9129 return off;
9130}
9131
9132
9133/*********************************************************************************************************************************
9134* General purpose register manipulation (add, sub). *
9135*********************************************************************************************************************************/
9136
9137#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9138 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9139
9140/** Emits code for IEM_MC_ADD_GREG_U16. */
9141DECL_INLINE_THROW(uint32_t)
9142iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9143{
9144 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9145 kIemNativeGstRegUse_ForUpdate);
9146
9147#ifdef RT_ARCH_AMD64
9148 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9149 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9150 if (idxGstTmpReg >= 8)
9151 pbCodeBuf[off++] = X86_OP_REX_B;
9152 if (uAddend == 1)
9153 {
9154 pbCodeBuf[off++] = 0xff; /* inc */
9155 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9156 }
9157 else
9158 {
9159 pbCodeBuf[off++] = 0x81;
9160 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9161 pbCodeBuf[off++] = uAddend;
9162 pbCodeBuf[off++] = 0;
9163 }
9164
9165#else
9166 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9167 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9168
9169 /* sub tmp, gstgrp, uAddend */
9170 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9171
9172 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9173 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9174
9175 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9176#endif
9177
9178 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9179
9180 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9181
9182 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9183 return off;
9184}
9185
9186
9187#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
9188 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9189
9190#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
9191 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9192
9193/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
9194DECL_INLINE_THROW(uint32_t)
9195iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
9196{
9197 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9198 kIemNativeGstRegUse_ForUpdate);
9199
9200#ifdef RT_ARCH_AMD64
9201 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9202 if (f64Bit)
9203 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9204 else if (idxGstTmpReg >= 8)
9205 pbCodeBuf[off++] = X86_OP_REX_B;
9206 if (uAddend == 1)
9207 {
9208 pbCodeBuf[off++] = 0xff; /* inc */
9209 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9210 }
9211 else if (uAddend < 128)
9212 {
9213 pbCodeBuf[off++] = 0x83; /* add */
9214 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9215 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9216 }
9217 else
9218 {
9219 pbCodeBuf[off++] = 0x81; /* add */
9220 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9221 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9222 pbCodeBuf[off++] = 0;
9223 pbCodeBuf[off++] = 0;
9224 pbCodeBuf[off++] = 0;
9225 }
9226
9227#else
9228 /* sub tmp, gstgrp, uAddend */
9229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9230 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
9231
9232#endif
9233
9234 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9235
9236 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9237
9238 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9239 return off;
9240}
9241
9242
9243
9244#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9245 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9246
9247/** Emits code for IEM_MC_SUB_GREG_U16. */
9248DECL_INLINE_THROW(uint32_t)
9249iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
9250{
9251 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9252 kIemNativeGstRegUse_ForUpdate);
9253
9254#ifdef RT_ARCH_AMD64
9255 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9256 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9257 if (idxGstTmpReg >= 8)
9258 pbCodeBuf[off++] = X86_OP_REX_B;
9259 if (uSubtrahend == 1)
9260 {
9261 pbCodeBuf[off++] = 0xff; /* dec */
9262 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9263 }
9264 else
9265 {
9266 pbCodeBuf[off++] = 0x81;
9267 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9268 pbCodeBuf[off++] = uSubtrahend;
9269 pbCodeBuf[off++] = 0;
9270 }
9271
9272#else
9273 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9274 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9275
9276 /* sub tmp, gstgrp, uSubtrahend */
9277 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
9278
9279 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9280 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9281
9282 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9283#endif
9284
9285 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9286
9287 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9288
9289 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9290 return off;
9291}
9292
9293
9294#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
9295 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9296
9297#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
9298 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9299
9300/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
9301DECL_INLINE_THROW(uint32_t)
9302iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
9303{
9304 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9305 kIemNativeGstRegUse_ForUpdate);
9306
9307#ifdef RT_ARCH_AMD64
9308 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9309 if (f64Bit)
9310 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9311 else if (idxGstTmpReg >= 8)
9312 pbCodeBuf[off++] = X86_OP_REX_B;
9313 if (uSubtrahend == 1)
9314 {
9315 pbCodeBuf[off++] = 0xff; /* dec */
9316 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9317 }
9318 else if (uSubtrahend < 128)
9319 {
9320 pbCodeBuf[off++] = 0x83; /* sub */
9321 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9322 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9323 }
9324 else
9325 {
9326 pbCodeBuf[off++] = 0x81; /* sub */
9327 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9328 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9329 pbCodeBuf[off++] = 0;
9330 pbCodeBuf[off++] = 0;
9331 pbCodeBuf[off++] = 0;
9332 }
9333
9334#else
9335 /* sub tmp, gstgrp, uSubtrahend */
9336 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9337 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
9338
9339#endif
9340
9341 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9342
9343 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9344
9345 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9346 return off;
9347}
9348
9349
9350
9351/*********************************************************************************************************************************
9352* EFLAGS *
9353*********************************************************************************************************************************/
9354
9355#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
9356 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
9357
9358/** Handles IEM_MC_FETCH_EFLAGS. */
9359DECL_INLINE_THROW(uint32_t)
9360iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
9361{
9362 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9363 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9364
9365 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
9366 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9367 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
9368 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9369 return off;
9370}
9371
9372
9373#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
9374 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
9375
9376/** Handles IEM_MC_COMMIT_EFLAGS. */
9377DECL_INLINE_THROW(uint32_t)
9378iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
9379{
9380 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9381 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9382
9383 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
9384
9385#ifdef VBOX_STRICT
9386 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
9387 uint32_t offFixup = off;
9388 off = iemNativeEmitJnzToFixed(pReNative, off, off);
9389 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
9390 iemNativeFixupFixedJump(pReNative, offFixup, off);
9391
9392 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
9393 offFixup = off;
9394 off = iemNativeEmitJzToFixed(pReNative, off, off);
9395 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
9396 iemNativeFixupFixedJump(pReNative, offFixup, off);
9397#endif
9398
9399 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9400 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
9401 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9402 return off;
9403}
9404
9405
9406
9407/*********************************************************************************************************************************
9408* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
9409*********************************************************************************************************************************/
9410
9411#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
9412 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
9413
9414#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
9415 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
9416
9417#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
9418 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
9419
9420
9421/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
9422 * IEM_MC_FETCH_SREG_ZX_U64. */
9423DECL_INLINE_THROW(uint32_t)
9424iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
9425{
9426 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9427 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
9428 Assert(iSReg < X86_SREG_COUNT);
9429
9430 /*
9431 * For now, we will not create a shadow copy of a selector. The rational
9432 * is that since we do not recompile the popping and loading of segment
9433 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
9434 * pushing and moving to registers, there is only a small chance that the
9435 * shadow copy will be accessed again before the register is reloaded. One
9436 * scenario would be nested called in 16-bit code, but I doubt it's worth
9437 * the extra register pressure atm.
9438 *
9439 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
9440 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
9441 * store scencario covered at present (r160730).
9442 */
9443 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9444 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9445 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
9446 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9447 return off;
9448}
9449
9450
9451
9452/*********************************************************************************************************************************
9453* Register references. *
9454*********************************************************************************************************************************/
9455
9456#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
9457 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
9458
9459#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
9460 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
9461
9462/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
9463DECL_INLINE_THROW(uint32_t)
9464iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
9465{
9466 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9467 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9468 Assert(iGRegEx < 20);
9469
9470 if (iGRegEx < 16)
9471 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9472 else
9473 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
9474
9475 /* If we've delayed writing back the register value, flush it now. */
9476 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9477
9478 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9479 if (!fConst)
9480 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
9481
9482 return off;
9483}
9484
9485#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
9486 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
9487
9488#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
9489 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
9490
9491#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
9492 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
9493
9494#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
9495 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
9496
9497#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
9498 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
9499
9500#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
9501 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
9502
9503#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
9504 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
9505
9506#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
9507 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
9508
9509#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
9510 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
9511
9512#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
9513 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
9514
9515/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
9516DECL_INLINE_THROW(uint32_t)
9517iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
9518{
9519 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9520 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9521 Assert(iGReg < 16);
9522
9523 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
9524
9525 /* If we've delayed writing back the register value, flush it now. */
9526 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
9527
9528 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9529 if (!fConst)
9530 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
9531
9532 return off;
9533}
9534
9535
9536#define IEM_MC_REF_EFLAGS(a_pEFlags) \
9537 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
9538
9539/** Handles IEM_MC_REF_EFLAGS. */
9540DECL_INLINE_THROW(uint32_t)
9541iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
9542{
9543 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9544 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9545
9546 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
9547
9548 /* If we've delayed writing back the register value, flush it now. */
9549 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
9550
9551 /* If there is a shadow copy of guest EFLAGS, flush it now. */
9552 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
9553
9554 return off;
9555}
9556
9557
9558/*********************************************************************************************************************************
9559* Effective Address Calculation *
9560*********************************************************************************************************************************/
9561#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
9562 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
9563
9564/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
9565 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
9566DECL_INLINE_THROW(uint32_t)
9567iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9568 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
9569{
9570 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9571
9572 /*
9573 * Handle the disp16 form with no registers first.
9574 *
9575 * Convert to an immediate value, as that'll delay the register allocation
9576 * and assignment till the memory access / call / whatever and we can use
9577 * a more appropriate register (or none at all).
9578 */
9579 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
9580 {
9581 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
9582 return off;
9583 }
9584
9585 /* Determin the displacment. */
9586 uint16_t u16EffAddr;
9587 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9588 {
9589 case 0: u16EffAddr = 0; break;
9590 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
9591 case 2: u16EffAddr = u16Disp; break;
9592 default: AssertFailedStmt(u16EffAddr = 0);
9593 }
9594
9595 /* Determine the registers involved. */
9596 uint8_t idxGstRegBase;
9597 uint8_t idxGstRegIndex;
9598 switch (bRm & X86_MODRM_RM_MASK)
9599 {
9600 case 0:
9601 idxGstRegBase = X86_GREG_xBX;
9602 idxGstRegIndex = X86_GREG_xSI;
9603 break;
9604 case 1:
9605 idxGstRegBase = X86_GREG_xBX;
9606 idxGstRegIndex = X86_GREG_xDI;
9607 break;
9608 case 2:
9609 idxGstRegBase = X86_GREG_xBP;
9610 idxGstRegIndex = X86_GREG_xSI;
9611 break;
9612 case 3:
9613 idxGstRegBase = X86_GREG_xBP;
9614 idxGstRegIndex = X86_GREG_xDI;
9615 break;
9616 case 4:
9617 idxGstRegBase = X86_GREG_xSI;
9618 idxGstRegIndex = UINT8_MAX;
9619 break;
9620 case 5:
9621 idxGstRegBase = X86_GREG_xDI;
9622 idxGstRegIndex = UINT8_MAX;
9623 break;
9624 case 6:
9625 idxGstRegBase = X86_GREG_xBP;
9626 idxGstRegIndex = UINT8_MAX;
9627 break;
9628#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
9629 default:
9630#endif
9631 case 7:
9632 idxGstRegBase = X86_GREG_xBX;
9633 idxGstRegIndex = UINT8_MAX;
9634 break;
9635 }
9636
9637 /*
9638 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
9639 */
9640 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9641 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9642 kIemNativeGstRegUse_ReadOnly);
9643 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
9644 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9645 kIemNativeGstRegUse_ReadOnly)
9646 : UINT8_MAX;
9647#ifdef RT_ARCH_AMD64
9648 if (idxRegIndex == UINT8_MAX)
9649 {
9650 if (u16EffAddr == 0)
9651 {
9652 /* movxz ret, base */
9653 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
9654 }
9655 else
9656 {
9657 /* lea ret32, [base64 + disp32] */
9658 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9659 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9660 if (idxRegRet >= 8 || idxRegBase >= 8)
9661 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9662 pbCodeBuf[off++] = 0x8d;
9663 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9664 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
9665 else
9666 {
9667 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
9668 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9669 }
9670 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9671 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9672 pbCodeBuf[off++] = 0;
9673 pbCodeBuf[off++] = 0;
9674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9675
9676 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9677 }
9678 }
9679 else
9680 {
9681 /* lea ret32, [index64 + base64 (+ disp32)] */
9682 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9683 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9684 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9685 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9686 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9687 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9688 pbCodeBuf[off++] = 0x8d;
9689 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
9690 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9691 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
9692 if (bMod == X86_MOD_MEM4)
9693 {
9694 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9695 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9696 pbCodeBuf[off++] = 0;
9697 pbCodeBuf[off++] = 0;
9698 }
9699 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9700 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9701 }
9702
9703#elif defined(RT_ARCH_ARM64)
9704 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9705 if (u16EffAddr == 0)
9706 {
9707 if (idxRegIndex == UINT8_MAX)
9708 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9709 else
9710 {
9711 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9712 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9713 }
9714 }
9715 else
9716 {
9717 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
9718 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
9719 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
9720 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9721 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
9722 else
9723 {
9724 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
9725 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9726 }
9727 if (idxRegIndex != UINT8_MAX)
9728 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
9729 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9730 }
9731
9732#else
9733# error "port me"
9734#endif
9735
9736 if (idxRegIndex != UINT8_MAX)
9737 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9738 iemNativeRegFreeTmp(pReNative, idxRegBase);
9739 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9740 return off;
9741}
9742
9743
9744#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
9745 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
9746
9747/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
9748 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
9749DECL_INLINE_THROW(uint32_t)
9750iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9751 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
9752{
9753 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9754
9755 /*
9756 * Handle the disp32 form with no registers first.
9757 *
9758 * Convert to an immediate value, as that'll delay the register allocation
9759 * and assignment till the memory access / call / whatever and we can use
9760 * a more appropriate register (or none at all).
9761 */
9762 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9763 {
9764 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
9765 return off;
9766 }
9767
9768 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9769 uint32_t u32EffAddr = 0;
9770 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9771 {
9772 case 0: break;
9773 case 1: u32EffAddr = (int8_t)u32Disp; break;
9774 case 2: u32EffAddr = u32Disp; break;
9775 default: AssertFailed();
9776 }
9777
9778 /* Get the register (or SIB) value. */
9779 uint8_t idxGstRegBase = UINT8_MAX;
9780 uint8_t idxGstRegIndex = UINT8_MAX;
9781 uint8_t cShiftIndex = 0;
9782 switch (bRm & X86_MODRM_RM_MASK)
9783 {
9784 case 0: idxGstRegBase = X86_GREG_xAX; break;
9785 case 1: idxGstRegBase = X86_GREG_xCX; break;
9786 case 2: idxGstRegBase = X86_GREG_xDX; break;
9787 case 3: idxGstRegBase = X86_GREG_xBX; break;
9788 case 4: /* SIB */
9789 {
9790 /* index /w scaling . */
9791 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9792 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9793 {
9794 case 0: idxGstRegIndex = X86_GREG_xAX; break;
9795 case 1: idxGstRegIndex = X86_GREG_xCX; break;
9796 case 2: idxGstRegIndex = X86_GREG_xDX; break;
9797 case 3: idxGstRegIndex = X86_GREG_xBX; break;
9798 case 4: cShiftIndex = 0; /*no index*/ break;
9799 case 5: idxGstRegIndex = X86_GREG_xBP; break;
9800 case 6: idxGstRegIndex = X86_GREG_xSI; break;
9801 case 7: idxGstRegIndex = X86_GREG_xDI; break;
9802 }
9803
9804 /* base */
9805 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
9806 {
9807 case 0: idxGstRegBase = X86_GREG_xAX; break;
9808 case 1: idxGstRegBase = X86_GREG_xCX; break;
9809 case 2: idxGstRegBase = X86_GREG_xDX; break;
9810 case 3: idxGstRegBase = X86_GREG_xBX; break;
9811 case 4:
9812 idxGstRegBase = X86_GREG_xSP;
9813 u32EffAddr += uSibAndRspOffset >> 8;
9814 break;
9815 case 5:
9816 if ((bRm & X86_MODRM_MOD_MASK) != 0)
9817 idxGstRegBase = X86_GREG_xBP;
9818 else
9819 {
9820 Assert(u32EffAddr == 0);
9821 u32EffAddr = u32Disp;
9822 }
9823 break;
9824 case 6: idxGstRegBase = X86_GREG_xSI; break;
9825 case 7: idxGstRegBase = X86_GREG_xDI; break;
9826 }
9827 break;
9828 }
9829 case 5: idxGstRegBase = X86_GREG_xBP; break;
9830 case 6: idxGstRegBase = X86_GREG_xSI; break;
9831 case 7: idxGstRegBase = X86_GREG_xDI; break;
9832 }
9833
9834 /*
9835 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9836 * the start of the function.
9837 */
9838 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9839 {
9840 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
9841 return off;
9842 }
9843
9844 /*
9845 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9846 */
9847 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9848 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9849 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9850 kIemNativeGstRegUse_ReadOnly);
9851 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9852 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9853 kIemNativeGstRegUse_ReadOnly);
9854
9855 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9856 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9857 {
9858 idxRegBase = idxRegIndex;
9859 idxRegIndex = UINT8_MAX;
9860 }
9861
9862#ifdef RT_ARCH_AMD64
9863 if (idxRegIndex == UINT8_MAX)
9864 {
9865 if (u32EffAddr == 0)
9866 {
9867 /* mov ret, base */
9868 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9869 }
9870 else
9871 {
9872 /* lea ret32, [base64 + disp32] */
9873 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9874 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9875 if (idxRegRet >= 8 || idxRegBase >= 8)
9876 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9877 pbCodeBuf[off++] = 0x8d;
9878 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9879 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9880 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9881 else
9882 {
9883 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9884 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9885 }
9886 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9887 if (bMod == X86_MOD_MEM4)
9888 {
9889 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9890 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9891 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9892 }
9893 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9894 }
9895 }
9896 else
9897 {
9898 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9899 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9900 if (idxRegBase == UINT8_MAX)
9901 {
9902 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
9903 if (idxRegRet >= 8 || idxRegIndex >= 8)
9904 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9905 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9906 pbCodeBuf[off++] = 0x8d;
9907 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9908 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9909 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9910 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9911 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9912 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9913 }
9914 else
9915 {
9916 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9917 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9918 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9919 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9920 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9921 pbCodeBuf[off++] = 0x8d;
9922 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9923 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9924 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9925 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9926 if (bMod != X86_MOD_MEM0)
9927 {
9928 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9929 if (bMod == X86_MOD_MEM4)
9930 {
9931 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9932 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9933 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9934 }
9935 }
9936 }
9937 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9938 }
9939
9940#elif defined(RT_ARCH_ARM64)
9941 if (u32EffAddr == 0)
9942 {
9943 if (idxRegIndex == UINT8_MAX)
9944 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9945 else if (idxRegBase == UINT8_MAX)
9946 {
9947 if (cShiftIndex == 0)
9948 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
9949 else
9950 {
9951 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9952 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
9953 }
9954 }
9955 else
9956 {
9957 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9958 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9959 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9960 }
9961 }
9962 else
9963 {
9964 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
9965 {
9966 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9967 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
9968 }
9969 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
9970 {
9971 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9972 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9973 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
9974 }
9975 else
9976 {
9977 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
9978 if (idxRegBase != UINT8_MAX)
9979 {
9980 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9981 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9982 }
9983 }
9984 if (idxRegIndex != UINT8_MAX)
9985 {
9986 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9987 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9988 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9989 }
9990 }
9991
9992#else
9993# error "port me"
9994#endif
9995
9996 if (idxRegIndex != UINT8_MAX)
9997 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9998 if (idxRegBase != UINT8_MAX)
9999 iemNativeRegFreeTmp(pReNative, idxRegBase);
10000 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10001 return off;
10002}
10003
10004
10005#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10006 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10007 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10008
10009#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10010 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10011 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10012
10013#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10014 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10015 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
10016
10017/**
10018 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
10019 *
10020 * @returns New off.
10021 * @param pReNative .
10022 * @param off .
10023 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
10024 * bit 4 to REX.X. The two bits are part of the
10025 * REG sub-field, which isn't needed in this
10026 * function.
10027 * @param uSibAndRspOffset Two parts:
10028 * - The first 8 bits make up the SIB byte.
10029 * - The next 8 bits are the fixed RSP/ESP offset
10030 * in case of a pop [xSP].
10031 * @param u32Disp The displacement byte/word/dword, if any.
10032 * @param cbInstr The size of the fully decoded instruction. Used
10033 * for RIP relative addressing.
10034 * @param idxVarRet The result variable number.
10035 * @param f64Bit Whether to use a 64-bit or 32-bit address size
10036 * when calculating the address.
10037 *
10038 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
10039 */
10040DECL_INLINE_THROW(uint32_t)
10041iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
10042 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
10043{
10044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10045
10046 /*
10047 * Special case the rip + disp32 form first.
10048 */
10049 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10050 {
10051 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10052 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
10053 kIemNativeGstRegUse_ReadOnly);
10054#ifdef RT_ARCH_AMD64
10055 if (f64Bit)
10056 {
10057 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
10058 if ((int32_t)offFinalDisp == offFinalDisp)
10059 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
10060 else
10061 {
10062 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
10063 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
10064 }
10065 }
10066 else
10067 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
10068
10069#elif defined(RT_ARCH_ARM64)
10070 if (f64Bit)
10071 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10072 (int64_t)(int32_t)u32Disp + cbInstr);
10073 else
10074 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10075 (int32_t)u32Disp + cbInstr);
10076
10077#else
10078# error "Port me!"
10079#endif
10080 iemNativeRegFreeTmp(pReNative, idxRegPc);
10081 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10082 return off;
10083 }
10084
10085 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10086 int64_t i64EffAddr = 0;
10087 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10088 {
10089 case 0: break;
10090 case 1: i64EffAddr = (int8_t)u32Disp; break;
10091 case 2: i64EffAddr = (int32_t)u32Disp; break;
10092 default: AssertFailed();
10093 }
10094
10095 /* Get the register (or SIB) value. */
10096 uint8_t idxGstRegBase = UINT8_MAX;
10097 uint8_t idxGstRegIndex = UINT8_MAX;
10098 uint8_t cShiftIndex = 0;
10099 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
10100 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
10101 else /* SIB: */
10102 {
10103 /* index /w scaling . */
10104 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10105 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10106 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
10107 if (idxGstRegIndex == 4)
10108 {
10109 /* no index */
10110 cShiftIndex = 0;
10111 idxGstRegIndex = UINT8_MAX;
10112 }
10113
10114 /* base */
10115 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
10116 if (idxGstRegBase == 4)
10117 {
10118 /* pop [rsp] hack */
10119 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
10120 }
10121 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
10122 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
10123 {
10124 /* mod=0 and base=5 -> disp32, no base reg. */
10125 Assert(i64EffAddr == 0);
10126 i64EffAddr = (int32_t)u32Disp;
10127 idxGstRegBase = UINT8_MAX;
10128 }
10129 }
10130
10131 /*
10132 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10133 * the start of the function.
10134 */
10135 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10136 {
10137 if (f64Bit)
10138 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
10139 else
10140 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
10141 return off;
10142 }
10143
10144 /*
10145 * Now emit code that calculates:
10146 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10147 * or if !f64Bit:
10148 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10149 */
10150 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10151 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10152 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10153 kIemNativeGstRegUse_ReadOnly);
10154 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10155 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10156 kIemNativeGstRegUse_ReadOnly);
10157
10158 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10159 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10160 {
10161 idxRegBase = idxRegIndex;
10162 idxRegIndex = UINT8_MAX;
10163 }
10164
10165#ifdef RT_ARCH_AMD64
10166 uint8_t bFinalAdj;
10167 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
10168 bFinalAdj = 0; /* likely */
10169 else
10170 {
10171 /* pop [rsp] with a problematic disp32 value. Split out the
10172 RSP offset and add it separately afterwards (bFinalAdj). */
10173 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
10174 Assert(idxGstRegBase == X86_GREG_xSP);
10175 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
10176 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
10177 Assert(bFinalAdj != 0);
10178 i64EffAddr -= bFinalAdj;
10179 Assert((int32_t)i64EffAddr == i64EffAddr);
10180 }
10181 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
10182//pReNative->pInstrBuf[off++] = 0xcc;
10183
10184 if (idxRegIndex == UINT8_MAX)
10185 {
10186 if (u32EffAddr == 0)
10187 {
10188 /* mov ret, base */
10189 if (f64Bit)
10190 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
10191 else
10192 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10193 }
10194 else
10195 {
10196 /* lea ret, [base + disp32] */
10197 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10198 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10199 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
10200 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10201 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10202 | (f64Bit ? X86_OP_REX_W : 0);
10203 pbCodeBuf[off++] = 0x8d;
10204 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10205 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10206 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10207 else
10208 {
10209 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10210 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10211 }
10212 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10213 if (bMod == X86_MOD_MEM4)
10214 {
10215 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10216 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10217 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10218 }
10219 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10220 }
10221 }
10222 else
10223 {
10224 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10225 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10226 if (idxRegBase == UINT8_MAX)
10227 {
10228 /* lea ret, [(index64 << cShiftIndex) + disp32] */
10229 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
10230 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10231 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10232 | (f64Bit ? X86_OP_REX_W : 0);
10233 pbCodeBuf[off++] = 0x8d;
10234 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10235 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10236 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10237 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10238 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10239 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10240 }
10241 else
10242 {
10243 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10244 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10245 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10246 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10247 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10248 | (f64Bit ? X86_OP_REX_W : 0);
10249 pbCodeBuf[off++] = 0x8d;
10250 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10251 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10252 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10253 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10254 if (bMod != X86_MOD_MEM0)
10255 {
10256 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10257 if (bMod == X86_MOD_MEM4)
10258 {
10259 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10260 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10261 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10262 }
10263 }
10264 }
10265 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10266 }
10267
10268 if (!bFinalAdj)
10269 { /* likely */ }
10270 else
10271 {
10272 Assert(f64Bit);
10273 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
10274 }
10275
10276#elif defined(RT_ARCH_ARM64)
10277 if (i64EffAddr == 0)
10278 {
10279 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10280 if (idxRegIndex == UINT8_MAX)
10281 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
10282 else if (idxRegBase != UINT8_MAX)
10283 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10284 f64Bit, false /*fSetFlags*/, cShiftIndex);
10285 else
10286 {
10287 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
10288 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
10289 }
10290 }
10291 else
10292 {
10293 if (f64Bit)
10294 { /* likely */ }
10295 else
10296 i64EffAddr = (int32_t)i64EffAddr;
10297
10298 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
10299 {
10300 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10301 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
10302 }
10303 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
10304 {
10305 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10306 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
10307 }
10308 else
10309 {
10310 if (f64Bit)
10311 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
10312 else
10313 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
10314 if (idxRegBase != UINT8_MAX)
10315 {
10316 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10317 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
10318 }
10319 }
10320 if (idxRegIndex != UINT8_MAX)
10321 {
10322 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10323 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10324 f64Bit, false /*fSetFlags*/, cShiftIndex);
10325 }
10326 }
10327
10328#else
10329# error "port me"
10330#endif
10331
10332 if (idxRegIndex != UINT8_MAX)
10333 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10334 if (idxRegBase != UINT8_MAX)
10335 iemNativeRegFreeTmp(pReNative, idxRegBase);
10336 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10337 return off;
10338}
10339
10340
10341/*********************************************************************************************************************************
10342* TLB Lookup. *
10343*********************************************************************************************************************************/
10344
10345/**
10346 * This must be instantiate *before* branching off to the lookup code,
10347 * so that register spilling and whatnot happens for everyone.
10348 */
10349typedef struct IEMNATIVEEMITTLBSTATE
10350{
10351 bool const fSkip;
10352 uint8_t const idxRegPtrHlp; /**< We don't support immediate variables with register assignment, so this a tmp reg alloc. */
10353 uint8_t const idxRegPtr;
10354 uint8_t const idxRegSegBase;
10355 uint8_t const idxRegSegLimit;
10356 uint8_t const idxRegSegAttrib;
10357 uint8_t const idxReg1;
10358 uint8_t const idxReg2;
10359#if defined(RT_ARCH_ARM64)
10360 uint8_t const idxReg3;
10361#endif
10362 uint64_t const uAbsPtr;
10363
10364 IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint32_t *a_poff, uint8_t a_idxVarGCPtrMem,
10365 uint8_t a_iSegReg, uint8_t a_cbMem, uint8_t a_offDisp = 0)
10366#ifdef IEMNATIVE_WITH_TLB_LOOKUP
10367 /* 32-bit and 64-bit wraparound will require special handling, so skip these for absolute addresses. */
10368 : fSkip( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10369 && ( (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT
10370 ? (uint64_t)(UINT32_MAX - a_cbMem - a_offDisp)
10371 : (uint64_t)(UINT64_MAX - a_cbMem - a_offDisp))
10372 < a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
10373#else
10374 : fSkip(true)
10375#endif
10376#if defined(RT_ARCH_AMD64) /* got good immediate encoding, otherwise we just load the address in a reg immediately. */
10377 , idxRegPtrHlp(UINT8_MAX)
10378#else
10379 , idxRegPtrHlp( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate
10380 || fSkip
10381 ? UINT8_MAX
10382 : iemNativeRegAllocTmpImm(a_pReNative, a_poff, a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue) )
10383#endif
10384 , idxRegPtr(a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate && !fSkip
10385 ? iemNativeVarRegisterAcquire(a_pReNative, a_idxVarGCPtrMem, a_poff,
10386 true /*fInitialized*/, IEMNATIVE_CALL_ARG2_GREG)
10387 : idxRegPtrHlp)
10388 , idxRegSegBase(a_iSegReg == UINT8_MAX || fSkip
10389 ? UINT8_MAX
10390 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
10391 , idxRegSegLimit((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
10392 ? UINT8_MAX
10393 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
10394 , idxRegSegAttrib((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
10395 ? UINT8_MAX
10396 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
10397 , idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10398 , idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10399#if defined(RT_ARCH_ARM64)
10400 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10401#endif
10402 , uAbsPtr( a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate || fSkip
10403 ? UINT64_MAX
10404 : a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
10405
10406 {
10407 RT_NOREF(a_cbMem, a_offDisp);
10408 }
10409
10410 /* Alternative constructor for PUSH and POP where we don't have a GCPtrMem
10411 variable, only a register derived from the guest RSP. */
10412 IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint8_t a_idxRegPtr, uint32_t *a_poff,
10413 uint8_t a_iSegReg, uint8_t a_cbMem)
10414#ifdef IEMNATIVE_WITH_TLB_LOOKUP
10415 : fSkip(false)
10416#else
10417 : fSkip(true)
10418#endif
10419 , idxRegPtrHlp(UINT8_MAX)
10420 , idxRegPtr(a_idxRegPtr)
10421 , idxRegSegBase(a_iSegReg == UINT8_MAX || fSkip
10422 ? UINT8_MAX
10423 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
10424 , idxRegSegLimit((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
10425 ? UINT8_MAX
10426 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
10427 , idxRegSegAttrib((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
10428 ? UINT8_MAX
10429 : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
10430 , idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10431 , idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10432#if defined(RT_ARCH_ARM64)
10433 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
10434#endif
10435 , uAbsPtr(UINT64_MAX)
10436
10437 {
10438 RT_NOREF_PV(a_cbMem);
10439 }
10440
10441 void freeRegsAndReleaseVars(PIEMRECOMPILERSTATE a_pReNative, uint8_t idxVarGCPtrMem = UINT8_MAX) const
10442 {
10443 if (idxRegPtr != UINT8_MAX)
10444 {
10445 if (idxRegPtrHlp == UINT8_MAX)
10446 {
10447 if (idxVarGCPtrMem != UINT8_MAX)
10448 iemNativeVarRegisterRelease(a_pReNative, idxVarGCPtrMem);
10449 }
10450 else
10451 {
10452 Assert(idxRegPtrHlp == idxRegPtr);
10453 iemNativeRegFreeTmpImm(a_pReNative, idxRegPtrHlp);
10454 }
10455 }
10456 else
10457 Assert(idxRegPtrHlp == UINT8_MAX);
10458 if (idxRegSegBase != UINT8_MAX)
10459 iemNativeRegFreeTmp(a_pReNative, idxRegSegBase);
10460 if (idxRegSegLimit != UINT8_MAX)
10461 {
10462 iemNativeRegFreeTmp(a_pReNative, idxRegSegLimit);
10463 iemNativeRegFreeTmp(a_pReNative, idxRegSegAttrib);
10464 }
10465 else
10466 Assert(idxRegSegAttrib == UINT8_MAX);
10467#if defined(RT_ARCH_ARM64)
10468 iemNativeRegFreeTmp(a_pReNative, idxReg3);
10469#endif
10470 iemNativeRegFreeTmp(a_pReNative, idxReg2);
10471 iemNativeRegFreeTmp(a_pReNative, idxReg1);
10472
10473 }
10474
10475 uint32_t getRegsNotToSave() const
10476 {
10477 if (!fSkip)
10478 return RT_BIT_32(idxReg1)
10479 | RT_BIT_32(idxReg2)
10480#if defined(RT_ARCH_ARM64)
10481 | RT_BIT_32(idxReg3)
10482#endif
10483 ;
10484 return 0;
10485 }
10486
10487 /** This is only for avoid assertions. */
10488 uint32_t getActiveRegsWithShadows() const
10489 {
10490#ifdef VBOX_STRICT
10491 if (!fSkip)
10492 return RT_BIT_32(idxRegSegBase) | RT_BIT_32(idxRegSegLimit) | RT_BIT_32(idxRegSegAttrib);
10493#endif
10494 return 0;
10495 }
10496} IEMNATIVEEMITTLBSTATE;
10497
10498
10499/**
10500 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
10501 */
10502DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
10503{
10504 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
10505 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
10506 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
10507 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
10508
10509 /* Do the lookup manually. */
10510 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
10511 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
10512 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
10513 if (RT_LIKELY(pTlbe->uTag == uTag))
10514 {
10515 /*
10516 * Check TLB page table level access flags.
10517 */
10518 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10519 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
10520 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
10521 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
10522 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
10523 | IEMTLBE_F_PG_UNASSIGNED
10524 | IEMTLBE_F_PT_NO_ACCESSED
10525 | fNoWriteNoDirty | fNoUser);
10526 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
10527 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
10528 {
10529 /*
10530 * Return the address.
10531 */
10532 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
10533 if ((uintptr_t)pbAddr == uResult)
10534 return;
10535 RT_NOREF(cbMem);
10536 AssertFailed();
10537 }
10538 else
10539 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
10540 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
10541 }
10542 else
10543 AssertFailed();
10544 RT_BREAKPOINT();
10545}
10546DECLASM(void) iemNativeHlpAsmSafeWrapCheckTlbLookup(void);
10547
10548
10549#ifdef IEMNATIVE_WITH_TLB_LOOKUP
10550DECL_INLINE_THROW(uint32_t)
10551iemNativeEmitTlbLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEEMITTLBSTATE const * const pTlbState,
10552 uint8_t iSegReg, uint8_t cbMem, uint8_t fAlignMask, uint32_t fAccess,
10553 uint32_t idxLabelTlbLookup, uint32_t idxLabelTlbMiss, uint8_t idxRegMemResult,
10554 uint8_t offDisp = 0)
10555{
10556 Assert(!pTlbState->fSkip);
10557# if defined(RT_ARCH_AMD64)
10558 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 512);
10559# elif defined(RT_ARCH_ARM64)
10560 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
10561# endif
10562
10563 /*
10564 * The expand down check isn't use all that much, so we emit here to keep
10565 * the lookup straighter.
10566 */
10567 /* check_expand_down: ; complicted! */
10568 uint32_t const offCheckExpandDown = off;
10569 uint32_t offFixupLimitDone = 0;
10570 if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
10571 {
10572off = iemNativeEmitBrkEx(pCodeBuf, off, 1); /** @todo this needs testing */
10573 /* cmp seglim, regptr */
10574 if (pTlbState->idxRegPtr != UINT8_MAX && offDisp == 0)
10575 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxRegPtr);
10576 else if (pTlbState->idxRegPtr == UINT8_MAX)
10577 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
10578 (uint32_t)(pTlbState->uAbsPtr + offDisp));
10579 else if (cbMem == 1)
10580 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxReg2);
10581 else
10582 { /* use idxRegMemResult to calc the displaced address. */
10583 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxRegPtr, offDisp);
10584 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, idxRegMemResult);
10585 }
10586 /* ja tlbmiss */
10587 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10588
10589 /* reg1 = segattr & X86DESCATTR_D (0x4000) */
10590 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib, X86DESCATTR_D);
10591 /* xor reg1, X86DESCATTR_D */
10592 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_D);
10593 /* shl reg1, 2 (16 - 14) */
10594 AssertCompile((X86DESCATTR_D << 2) == UINT32_C(0x10000));
10595 off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, pTlbState->idxReg1, 2);
10596 /* dec reg1 (=> 0xffff if D=0; 0xffffffff if D=1) */
10597 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, 1);
10598 /* cmp reg1, reg2 (64-bit) / imm (32-bit) */
10599 if (pTlbState->idxRegPtr != UINT8_MAX)
10600 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1,
10601 cbMem > 1 || offDisp != 0 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
10602 else
10603 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1,
10604 (uint32_t)(pTlbState->uAbsPtr + offDisp + cbMem - 1)); /* fSkip=true on overflow. */
10605 /* jbe tlbmiss */
10606 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
10607 /* jmp limitdone */
10608 offFixupLimitDone = off;
10609 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off /* ASSUME short jump suffices */);
10610 }
10611
10612 /*
10613 * tlblookup:
10614 */
10615 iemNativeLabelDefine(pReNative, idxLabelTlbLookup, off);
10616# if defined(RT_ARCH_ARM64) && 0
10617 off = iemNativeEmitBrkEx(pCodeBuf, off, 0);
10618# endif
10619
10620 /*
10621 * 1. Segmentation.
10622 *
10623 * 1a. Check segment limit and attributes if non-flat 32-bit code. This is complicated.
10624 */
10625 if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
10626 {
10627 /* Check that we've got a segment loaded and that it allows the access.
10628 For write access this means a writable data segment.
10629 For read-only accesses this means a readable code segment or any data segment. */
10630 if (fAccess & IEM_ACCESS_TYPE_WRITE)
10631 {
10632 uint32_t const fMustBe1 = X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_WRITE;
10633 uint32_t const fMustBe0 = X86DESCATTR_UNUSABLE | X86_SEL_TYPE_CODE;
10634 /* reg1 = segattrs & (must1|must0) */
10635 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
10636 pTlbState->idxRegSegAttrib, fMustBe1 | fMustBe0);
10637 /* cmp reg1, must1 */
10638 AssertCompile(fMustBe1 <= UINT16_MAX);
10639 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, fMustBe1);
10640 /* jne tlbmiss */
10641 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10642 }
10643 else
10644 {
10645 /* U | !P |!DT |!CD | RW |
10646 16 | 8 | 4 | 3 | 1 |
10647 -------------------------------
10648 0 | 0 | 0 | 0 | 0 | execute-only code segment. - must be excluded
10649 0 | 0 | 0 | 0 | 1 | execute-read code segment.
10650 0 | 0 | 0 | 1 | 0 | read-only data segment.
10651 0 | 0 | 0 | 1 | 1 | read-write data segment. - last valid combination
10652 */
10653 /* reg1 = segattrs & (relevant attributes) */
10654 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib,
10655 X86DESCATTR_UNUSABLE | X86DESCATTR_P | X86DESCATTR_DT
10656 | X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE);
10657 /* xor reg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE ; place C=1 RW=0 at the bottom & limit the range.
10658 ; EO-code=0, ER-code=2, RO-data=8, RW-data=10 */
10659#ifdef RT_ARCH_ARM64
10660 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_DT | X86_SEL_TYPE_CODE);
10661 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_P);
10662#else
10663 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1,
10664 X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE);
10665#endif
10666 /* sub reg1, X86_SEL_TYPE_WRITE ; EO-code=-2, ER-code=0, RO-data=6, RW-data=8 */
10667 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_WRITE /* ER-code */);
10668 /* cmp reg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE */
10669 AssertCompile(X86_SEL_TYPE_CODE == 8);
10670 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_CODE);
10671 /* ja tlbmiss */
10672 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10673 }
10674
10675 /* If we're accessing more than one byte or if we're working with a non-zero offDisp,
10676 put the last address we'll be accessing in idxReg2 (64-bit). */
10677 if ((cbMem > 1 || offDisp != 0) && pTlbState->idxRegPtr != UINT8_MAX)
10678 {
10679 if (!offDisp)
10680 /* reg2 = regptr + cbMem - 1; 64-bit result so we can fend of wraparounds/overflows. */
10681 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, pTlbState->idxReg2,/*=*/ pTlbState->idxRegPtr,/*+*/ cbMem - 1);
10682 else
10683 {
10684 /* reg2 = (uint32_t)(regptr + offDisp) + cbMem - 1;. */
10685 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off,
10686 pTlbState->idxReg2,/*=*/ pTlbState->idxRegPtr,/*+*/ + offDisp);
10687 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, cbMem - 1);
10688 }
10689 }
10690
10691 /*
10692 * Check the limit. If this is a write access, we know that it's a
10693 * data segment and includes the expand_down bit. For read-only accesses
10694 * we need to check that code/data=0 and expanddown=1 before continuing.
10695 */
10696 if (fAccess & IEM_ACCESS_TYPE_WRITE)
10697 {
10698 /* test segattrs, X86_SEL_TYPE_DOWN */
10699 AssertCompile(X86_SEL_TYPE_DOWN < 128);
10700 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, pTlbState->idxRegSegAttrib, X86_SEL_TYPE_DOWN);
10701 /* jnz check_expand_down */
10702 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_ne);
10703 }
10704 else
10705 {
10706 /* reg1 = segattr & (code | down) */
10707 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
10708 pTlbState->idxRegSegAttrib, X86_SEL_TYPE_CODE | X86_SEL_TYPE_DOWN);
10709 /* cmp reg1, down */
10710 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_DOWN);
10711 /* je check_expand_down */
10712 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_e);
10713 }
10714
10715 /* expand_up:
10716 cmp seglim, regptr/reg2/imm */
10717 if (pTlbState->idxRegPtr != UINT8_MAX)
10718 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
10719 cbMem > 1 || offDisp != 0 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
10720 else
10721 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
10722 (uint32_t)pTlbState->uAbsPtr + offDisp + cbMem - 1U); /* fSkip=true on overflow. */
10723 /* jbe tlbmiss */
10724 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
10725
10726 /* limitdone: */
10727 iemNativeFixupFixedJump(pReNative, offFixupLimitDone, off);
10728 }
10729
10730 /* 1b. Add the segment base. We use idxRegMemResult for the ptr register if
10731 this step is required or if the address is a constant (simplicity) or
10732 if offDisp is non-zero. */
10733 uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX || pTlbState->idxRegPtr == UINT8_MAX || offDisp != 0
10734 ? idxRegMemResult : pTlbState->idxRegPtr;
10735 if (iSegReg != UINT8_MAX)
10736 {
10737 Assert(idxRegFlatPtr != pTlbState->idxRegPtr);
10738 /* regflat = segbase + regptr/imm */
10739 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
10740 {
10741 Assert(iSegReg >= X86_SREG_FS);
10742 if (pTlbState->idxRegPtr != UINT8_MAX)
10743 {
10744 off = iemNativeEmitGprEqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
10745 if (offDisp != 0)
10746 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxRegFlatPtr, offDisp);
10747 }
10748 else
10749 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase,
10750 pTlbState->uAbsPtr + offDisp);
10751 }
10752 else if (pTlbState->idxRegPtr != UINT8_MAX)
10753 {
10754 off = iemNativeEmitGpr32EqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
10755 if (offDisp != 0)
10756 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegFlatPtr, offDisp);
10757 }
10758 else
10759 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr,
10760 pTlbState->idxRegSegBase, (uint32_t)pTlbState->uAbsPtr + offDisp);
10761 }
10762 else if (pTlbState->idxRegPtr == UINT8_MAX)
10763 {
10764 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
10765 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->uAbsPtr + offDisp);
10766 else
10767 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxRegFlatPtr, (uint32_t)pTlbState->uAbsPtr + offDisp);
10768 }
10769 else if (offDisp != 0)
10770 {
10771 Assert(idxRegFlatPtr != pTlbState->idxRegPtr);
10772 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
10773 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr, offDisp);
10774 else
10775 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr, offDisp);
10776 }
10777 else
10778 Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
10779
10780 /*
10781 * 2. Check that the address doesn't cross a page boundrary and doesn't have alignment issues.
10782 *
10783 * 2a. Alignment check using fAlignMask.
10784 */
10785 if (fAlignMask)
10786 {
10787 Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1));
10788 Assert(fAlignMask < 128);
10789 /* test regflat, fAlignMask */
10790 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, fAlignMask);
10791 /* jnz tlbmiss */
10792 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10793 }
10794
10795 /*
10796 * 2b. Check that it's not crossing page a boundrary. This is implicit in
10797 * the previous test if the alignment is same or larger than the type.
10798 */
10799 if (cbMem > fAlignMask + 1)
10800 {
10801 /* reg1 = regflat & 0xfff */
10802 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
10803 /* cmp reg1, GUEST_PAGE_SIZE - cbMem */
10804 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE);
10805 /* ja tlbmiss */
10806 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10807 }
10808
10809 /*
10810 * 3. TLB lookup.
10811 *
10812 * 3a. Calculate the TLB tag value (IEMTLB_CALC_TAG).
10813 * In 64-bit mode we will also check for non-canonical addresses here.
10814 */
10815 if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
10816 {
10817# if defined(RT_ARCH_AMD64)
10818 /* mov reg1, regflat */
10819 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr);
10820 /* rol reg1, 16 */
10821 off = iemNativeEmitRotateGprLeftEx(pCodeBuf, off, pTlbState->idxReg1, 16);
10822 /** @todo Would 'movsx reg2, word reg1' and working on reg2 in dwords be faster? */
10823 /* inc word reg1 */
10824 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10825 if (pTlbState->idxReg1 >= 8)
10826 pCodeBuf[off++] = X86_OP_REX_B;
10827 pCodeBuf[off++] = 0xff;
10828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, pTlbState->idxReg1 & 7);
10829 /* cmp word reg1, 1 */
10830 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10831 if (pTlbState->idxReg1 >= 8)
10832 pCodeBuf[off++] = X86_OP_REX_B;
10833 pCodeBuf[off++] = 0x83;
10834 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, pTlbState->idxReg1 & 7);
10835 pCodeBuf[off++] = 1;
10836 /* ja tlbmiss */
10837 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
10838 /* shr reg1, 16 + GUEST_PAGE_SHIFT */
10839 off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, pTlbState->idxReg1, 16 + GUEST_PAGE_SHIFT);
10840
10841# elif defined(RT_ARCH_ARM64)
10842 /* lsr reg1, regflat, #48 */
10843 pCodeBuf[off++] = Armv8A64MkInstrLslImm(pTlbState->idxReg1, idxRegFlatPtr, 4);
10844 /* add reg1, reg1, #1 */
10845 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(pTlbState->idxReg1, pTlbState->idxReg1, 1, false /*f64Bit*/);
10846 /* tst reg1, #0xfffe */
10847 Assert(Armv8A64ConvertImmRImmS2Mask32(14, 31) == 0xfffe);
10848 pCodeBuf[off++] = Armv8A64MkInstrTstImm(pTlbState->idxReg1, 14, 31, false /*f64Bit*/);
10849 /* b.nq tlbmiss */
10850 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10851
10852 /* ubfx reg1, regflat, #12, #36 */
10853 pCodeBuf[off++] = Armv8A64MkInstrUbfx(pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT, 48 - GUEST_PAGE_SHIFT);
10854# else
10855# error "Port me"
10856# endif
10857 }
10858 else
10859 {
10860 /* reg1 = (uint32_t)(regflat >> 12) */
10861 off = iemNativeEmitGpr32EqGprShiftRightImmEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT);
10862 }
10863 /* or reg1, [qword pVCpu->iem.s.DataTlb.uTlbRevision] */
10864# if defined(RT_ARCH_AMD64)
10865 pCodeBuf[off++] = pTlbState->idxReg1 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
10866 pCodeBuf[off++] = 0x0b; /* OR r64,r/m64 */
10867 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, pTlbState->idxReg1, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
10868# else
10869 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
10870 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
10871# endif
10872
10873 /*
10874 * 3b. Calc pTlbe.
10875 */
10876# if defined(RT_ARCH_AMD64)
10877 /* movzx reg2, byte reg1 */
10878 off = iemNativeEmitLoadGprFromGpr8Ex(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxReg1);
10879 /* shl reg2, 5 ; reg2 *= sizeof(IEMTLBENTRY) */
10880 AssertCompileSize(IEMTLBENTRY, 32);
10881 off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, pTlbState->idxReg2, 5);
10882 /* lea reg2, [pVCpu->iem.s.DataTlb.aEntries + reg2] */
10883 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU < 8);
10884 pCodeBuf[off++] = pTlbState->idxReg2 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_X | X86_OP_REX_R;
10885 pCodeBuf[off++] = 0x8d;
10886 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, pTlbState->idxReg2 & 7, 4 /*SIB*/);
10887 pCodeBuf[off++] = X86_SIB_MAKE(IEMNATIVE_REG_FIXED_PVMCPU & 7, pTlbState->idxReg2 & 7, 0);
10888 pCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10889 pCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10890 pCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10891 pCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries));
10892
10893# elif defined(RT_ARCH_ARM64)
10894 /* reg2 = (reg1 & 0xff) << 5 */
10895 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(pTlbState->idxReg2, pTlbState->idxReg1, 5, 8);
10896 /* reg2 += offsetof(VMCPUCC, iem.s.DataTlb.aEntries) */
10897 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries),
10898 pTlbState->idxReg3 /*iGprTmp*/);
10899 /* reg2 += pVCpu */
10900 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, pTlbState->idxReg2, IEMNATIVE_REG_FIXED_PVMCPU);
10901# else
10902# error "Port me"
10903# endif
10904
10905 /*
10906 * 3c. Compare the TLBE.uTag with the one from 2a (reg1).
10907 */
10908# if defined(RT_ARCH_AMD64)
10909 /* cmp reg1, [reg2] */
10910 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
10911 pCodeBuf[off++] = 0x3b;
10912 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
10913# elif defined(RT_ARCH_ARM64)
10914 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
10915 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
10916# else
10917# error "Port me"
10918# endif
10919 /* jne tlbmiss */
10920 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10921
10922 /*
10923 * 4. Check TLB page table level access flags and physical page revision #.
10924 */
10925 /* mov reg1, mask */
10926 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10927 uint64_t const fNoUser = (((pReNative->fExec >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK) + 1) & IEMTLBE_F_PT_NO_USER;
10928 uint64_t fTlbe = IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3 | IEMTLBE_F_PG_UNASSIGNED | IEMTLBE_F_PT_NO_ACCESSED
10929 | fNoUser;
10930 if (fAccess & IEM_ACCESS_TYPE_READ)
10931 fTlbe |= IEMTLBE_F_PG_NO_READ;
10932 if (fAccess & IEM_ACCESS_TYPE_WRITE)
10933 fTlbe |= IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PG_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY;
10934 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, fTlbe);
10935# if defined(RT_ARCH_AMD64)
10936 /* and reg1, [reg2->fFlagsAndPhysRev] */
10937 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
10938 pCodeBuf[off++] = 0x23;
10939 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1,
10940 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
10941
10942 /* cmp reg1, [pVCpu->iem.s.DataTlb.uTlbPhysRev] */
10943 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R);
10944 pCodeBuf[off++] = 0x3b;
10945 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, IEMNATIVE_REG_FIXED_PVMCPU,
10946 RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
10947# elif defined(RT_ARCH_ARM64)
10948 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2,
10949 RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
10950 pCodeBuf[off++] = Armv8A64MkInstrAnd(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg3);
10951 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
10952 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
10953# else
10954# error "Port me"
10955# endif
10956 /* jne tlbmiss */
10957 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
10958
10959 /*
10960 * 5. Check that pbMappingR3 isn't NULL (paranoia) and calculate the
10961 * resulting pointer.
10962 */
10963 /* mov reg1, [reg2->pbMappingR3] */
10964 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
10965 RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3));
10966 /* if (!reg1) goto tlbmiss; */
10967 /** @todo eliminate the need for this test? */
10968 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, pTlbState->idxReg1,
10969 true /*f64Bit*/, idxLabelTlbMiss);
10970
10971 if (idxRegFlatPtr == idxRegMemResult) /* See step 1b. */
10972 {
10973 /* and result, 0xfff */
10974 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
10975 }
10976 else
10977 {
10978 Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
10979 /* result = regflat & 0xfff */
10980 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxRegMemResult, idxRegFlatPtr, GUEST_PAGE_OFFSET_MASK);
10981 }
10982 /* add result, reg1 */
10983 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg1);
10984
10985# if 0
10986 /*
10987 * To verify the result we call a helper function.
10988 *
10989 * It's like the state logging, so parameters are passed on the stack.
10990 * iemNativeHlpAsmSafeWrapCheckTlbLookup(pVCpu, result, addr, seg | (cbMem << 8) | (fAccess << 16))
10991 */
10992# ifdef RT_ARCH_AMD64
10993 /* push seg | (cbMem << 8) | (fAccess << 16) */
10994 pCodeBuf[off++] = 0x68;
10995 pCodeBuf[off++] = iSegReg;
10996 pCodeBuf[off++] = cbMem;
10997 pCodeBuf[off++] = RT_BYTE1(fAccess);
10998 pCodeBuf[off++] = RT_BYTE2(fAccess);
10999 /* push pTlbState->idxRegPtr / immediate address. */
11000 if (pTlbState->idxRegPtr != UINT8_MAX)
11001 {
11002 if (pTlbState->idxRegPtr >= 8)
11003 pCodeBuf[off++] = X86_OP_REX_B;
11004 pCodeBuf[off++] = 0x50 + (pTlbState->idxRegPtr & 7);
11005 }
11006 else
11007 {
11008 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->uAbsPtr);
11009 if (pTlbState->idxReg1 >= 8)
11010 pCodeBuf[off++] = X86_OP_REX_B;
11011 pCodeBuf[off++] = 0x50 + (pTlbState->idxReg1 & 7);
11012 }
11013 /* push idxRegMemResult */
11014 if (idxRegMemResult >= 8)
11015 pCodeBuf[off++] = X86_OP_REX_B;
11016 pCodeBuf[off++] = 0x50 + (idxRegMemResult & 7);
11017 /* push pVCpu */
11018 pCodeBuf[off++] = 0x50 + IEMNATIVE_REG_FIXED_PVMCPU;
11019 /* mov reg1, helper */
11020 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, (uintptr_t)iemNativeHlpAsmSafeWrapCheckTlbLookup);
11021 /* call [reg1] */
11022 pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_B);
11023 pCodeBuf[off++] = 0xff;
11024 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, pTlbState->idxReg1 & 7);
11025 /* The stack is cleaned up by helper function. */
11026
11027# else
11028# error "Port me"
11029# endif
11030# endif
11031
11032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11033
11034 return off;
11035}
11036#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
11037
11038
11039/*********************************************************************************************************************************
11040* Memory fetches and stores common *
11041*********************************************************************************************************************************/
11042
11043typedef enum IEMNATIVEMITMEMOP
11044{
11045 kIemNativeEmitMemOp_Store = 0,
11046 kIemNativeEmitMemOp_Fetch,
11047 kIemNativeEmitMemOp_Fetch_Zx_U16,
11048 kIemNativeEmitMemOp_Fetch_Zx_U32,
11049 kIemNativeEmitMemOp_Fetch_Zx_U64,
11050 kIemNativeEmitMemOp_Fetch_Sx_U16,
11051 kIemNativeEmitMemOp_Fetch_Sx_U32,
11052 kIemNativeEmitMemOp_Fetch_Sx_U64
11053} IEMNATIVEMITMEMOP;
11054
11055/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11056 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11057 * (with iSegReg = UINT8_MAX). */
11058DECL_INLINE_THROW(uint32_t)
11059iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11060 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11061 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11062{
11063 /*
11064 * Assert sanity.
11065 */
11066 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11067 Assert( enmOp != kIemNativeEmitMemOp_Store
11068 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
11069 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
11070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11071 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
11072 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
11073 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11074 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11075 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11076 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11077#ifdef VBOX_STRICT
11078 if (iSegReg == UINT8_MAX)
11079 {
11080 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11081 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11082 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11083 switch (cbMem)
11084 {
11085 case 1:
11086 Assert( pfnFunction
11087 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11088 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11089 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11090 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11091 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11092 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11093 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11094 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11095 : UINT64_C(0xc000b000a0009000) ));
11096 break;
11097 case 2:
11098 Assert( pfnFunction
11099 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11100 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11101 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11102 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11103 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11104 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11105 : UINT64_C(0xc000b000a0009000) ));
11106 break;
11107 case 4:
11108 Assert( pfnFunction
11109 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11110 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11111 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11112 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11113 : UINT64_C(0xc000b000a0009000) ));
11114 break;
11115 case 8:
11116 Assert( pfnFunction
11117 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11118 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11119 : UINT64_C(0xc000b000a0009000) ));
11120 break;
11121 }
11122 }
11123 else
11124 {
11125 Assert(iSegReg < 6);
11126 switch (cbMem)
11127 {
11128 case 1:
11129 Assert( pfnFunction
11130 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11131 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11132 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11133 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11134 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11135 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11136 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11137 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11138 : UINT64_C(0xc000b000a0009000) ));
11139 break;
11140 case 2:
11141 Assert( pfnFunction
11142 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11143 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11144 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11145 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11146 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11147 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11148 : UINT64_C(0xc000b000a0009000) ));
11149 break;
11150 case 4:
11151 Assert( pfnFunction
11152 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11153 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11154 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11155 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11156 : UINT64_C(0xc000b000a0009000) ));
11157 break;
11158 case 8:
11159 Assert( pfnFunction
11160 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11161 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11162 : UINT64_C(0xc000b000a0009000) ));
11163 break;
11164 }
11165 }
11166#endif
11167
11168#ifdef VBOX_STRICT
11169 /*
11170 * Check that the fExec flags we've got make sense.
11171 */
11172 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11173#endif
11174
11175 /*
11176 * To keep things simple we have to commit any pending writes first as we
11177 * may end up making calls.
11178 */
11179 /** @todo we could postpone this till we make the call and reload the
11180 * registers after returning from the call. Not sure if that's sensible or
11181 * not, though. */
11182 off = iemNativeRegFlushPendingWrites(pReNative, off);
11183
11184#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11185 /*
11186 * Move/spill/flush stuff out of call-volatile registers.
11187 * This is the easy way out. We could contain this to the tlb-miss branch
11188 * by saving and restoring active stuff here.
11189 */
11190 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11191#endif
11192
11193 /*
11194 * Define labels and allocate the result register (trying for the return
11195 * register if we can).
11196 */
11197 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11198 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11199 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11200 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11201 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11202 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11203 uint8_t const idxRegValueStore = !TlbState.fSkip
11204 && enmOp == kIemNativeEmitMemOp_Store
11205 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11206 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11207 : UINT8_MAX;
11208 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11209 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11210 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11211 : UINT32_MAX;
11212
11213 /*
11214 * Jump to the TLB lookup code.
11215 */
11216 if (!TlbState.fSkip)
11217 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11218
11219 /*
11220 * TlbMiss:
11221 *
11222 * Call helper to do the fetching.
11223 * We flush all guest register shadow copies here.
11224 */
11225 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11226
11227#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11228 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11229#else
11230 RT_NOREF(idxInstr);
11231#endif
11232
11233#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11234 /* Save variables in volatile registers. */
11235 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11236 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11237 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11238 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11239#endif
11240
11241 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11242 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11243 if (enmOp == kIemNativeEmitMemOp_Store)
11244 {
11245 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11246 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11247#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11248 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11249#else
11250 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11251 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11252#endif
11253 }
11254
11255 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11256 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11257#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11258 fVolGregMask);
11259#else
11260 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
11261#endif
11262
11263 if (iSegReg != UINT8_MAX)
11264 {
11265 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
11266 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11267 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
11268 }
11269
11270 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11271 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11272
11273 /* Done setting up parameters, make the call. */
11274 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11275
11276 /*
11277 * Put the result in the right register if this is a fetch.
11278 */
11279 if (enmOp != kIemNativeEmitMemOp_Store)
11280 {
11281 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
11282 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
11283 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
11284 }
11285
11286#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11287 /* Restore variables and guest shadow registers to volatile registers. */
11288 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11289 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11290#endif
11291
11292#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11293 if (!TlbState.fSkip)
11294 {
11295 /* end of TlbMiss - Jump to the done label. */
11296 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11297 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11298
11299 /*
11300 * TlbLookup:
11301 */
11302 off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
11303 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
11304 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
11305
11306 /*
11307 * Emit code to do the actual storing / fetching.
11308 */
11309 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11310# ifdef VBOX_WITH_STATISTICS
11311 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11312 enmOp == kIemNativeEmitMemOp_Store
11313 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
11314 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
11315# endif
11316 switch (enmOp)
11317 {
11318 case kIemNativeEmitMemOp_Store:
11319 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
11320 {
11321 switch (cbMem)
11322 {
11323 case 1:
11324 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11325 break;
11326 case 2:
11327 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11328 break;
11329 case 4:
11330 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11331 break;
11332 case 8:
11333 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11334 break;
11335 default:
11336 AssertFailed();
11337 }
11338 }
11339 else
11340 {
11341 switch (cbMem)
11342 {
11343 case 1:
11344 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
11345 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11346 idxRegMemResult, TlbState.idxReg1);
11347 break;
11348 case 2:
11349 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11350 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11351 idxRegMemResult, TlbState.idxReg1);
11352 break;
11353 case 4:
11354 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11355 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11356 idxRegMemResult, TlbState.idxReg1);
11357 break;
11358 case 8:
11359 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11360 idxRegMemResult, TlbState.idxReg1);
11361 break;
11362 default:
11363 AssertFailed();
11364 }
11365 }
11366 break;
11367
11368 case kIemNativeEmitMemOp_Fetch:
11369 case kIemNativeEmitMemOp_Fetch_Zx_U16:
11370 case kIemNativeEmitMemOp_Fetch_Zx_U32:
11371 case kIemNativeEmitMemOp_Fetch_Zx_U64:
11372 switch (cbMem)
11373 {
11374 case 1:
11375 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11376 break;
11377 case 2:
11378 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11379 break;
11380 case 4:
11381 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11382 break;
11383 case 8:
11384 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11385 break;
11386 default:
11387 AssertFailed();
11388 }
11389 break;
11390
11391 case kIemNativeEmitMemOp_Fetch_Sx_U16:
11392 Assert(cbMem == 1);
11393 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11394 break;
11395
11396 case kIemNativeEmitMemOp_Fetch_Sx_U32:
11397 Assert(cbMem == 1 || cbMem == 2);
11398 if (cbMem == 1)
11399 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11400 else
11401 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11402 break;
11403
11404 case kIemNativeEmitMemOp_Fetch_Sx_U64:
11405 switch (cbMem)
11406 {
11407 case 1:
11408 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11409 break;
11410 case 2:
11411 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11412 break;
11413 case 4:
11414 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11415 break;
11416 default:
11417 AssertFailed();
11418 }
11419 break;
11420
11421 default:
11422 AssertFailed();
11423 }
11424
11425 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11426
11427 /*
11428 * TlbDone:
11429 */
11430 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11431
11432 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11433
11434# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11435 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11436 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11437# endif
11438 }
11439#else
11440 RT_NOREF(fAlignMask, idxLabelTlbMiss);
11441#endif
11442
11443 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
11444 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11445 return off;
11446}
11447
11448
11449
11450/*********************************************************************************************************************************
11451* Memory fetches (IEM_MEM_FETCH_XXX). *
11452*********************************************************************************************************************************/
11453
11454/* 8-bit segmented: */
11455#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
11456 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
11457 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11458 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11459
11460#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11461 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11462 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11463 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11464
11465#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11466 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11467 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11468 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11469
11470#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11471 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11472 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11473 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11474
11475#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11476 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11477 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11478 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11479
11480#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11481 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11482 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11483 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11484
11485#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11486 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11487 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11488 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11489
11490/* 16-bit segmented: */
11491#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11492 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11493 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11494 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11495
11496#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11497 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11498 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11499 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11500
11501#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11502 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11503 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11504 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11505
11506#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11507 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11508 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11509 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11510
11511#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11512 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11513 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11514 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11515
11516#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11517 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11518 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11519 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11520
11521
11522/* 32-bit segmented: */
11523#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11524 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11525 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11526 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11527
11528#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11529 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11530 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11531 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11532
11533#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11534 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11535 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11536 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11537
11538#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11539 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11540 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11541 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11542
11543
11544/* 64-bit segmented: */
11545#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11546 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11547 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11548 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
11549
11550
11551
11552/* 8-bit flat: */
11553#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
11554 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
11555 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11556 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11557
11558#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
11559 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11560 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11561 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11562
11563#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
11564 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11565 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11566 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11567
11568#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
11569 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11570 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11571 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11572
11573#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11574 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11575 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11576 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11577
11578#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11579 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11580 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11581 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11582
11583#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11584 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11585 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11586 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11587
11588
11589/* 16-bit flat: */
11590#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11591 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11592 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11593 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11594
11595#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11596 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11597 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11598 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11599
11600#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
11601 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11602 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11603 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11604
11605#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
11606 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11607 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11608 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11609
11610#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
11611 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11612 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11613 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11614
11615#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
11616 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11617 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11618 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11619
11620/* 32-bit flat: */
11621#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
11622 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11623 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11624 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11625
11626#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
11627 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11628 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11629 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11630
11631#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
11632 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11633 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11634 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11635
11636#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
11637 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11638 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11639 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11640
11641/* 64-bit flat: */
11642#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
11643 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11644 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11645 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
11646
11647
11648
11649/*********************************************************************************************************************************
11650* Memory stores (IEM_MEM_STORE_XXX). *
11651*********************************************************************************************************************************/
11652
11653#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
11654 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
11655 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11656 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11657
11658#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
11659 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
11660 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11661 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11662
11663#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
11664 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
11665 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11666 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11667
11668#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
11669 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
11670 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11671 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11672
11673
11674#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
11675 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
11676 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11677 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11678
11679#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
11680 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
11681 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11682 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11683
11684#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
11685 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
11686 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11687 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11688
11689#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
11690 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
11691 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11692 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11693
11694
11695#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11696 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11697 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11698
11699#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11700 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11701 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11702
11703#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11704 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11705 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11706
11707#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11708 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11709 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11710
11711
11712#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11713 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11714 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11715
11716#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11717 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11718 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11719
11720#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11721 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11722 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11723
11724#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11725 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11726 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11727
11728/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11729 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11730DECL_INLINE_THROW(uint32_t)
11731iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11732 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11733{
11734 /*
11735 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11736 * to do the grunt work.
11737 */
11738 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11739 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11740 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11741 pfnFunction, idxInstr);
11742 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11743 return off;
11744}
11745
11746
11747
11748/*********************************************************************************************************************************
11749* Stack Accesses. *
11750*********************************************************************************************************************************/
11751/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11752#define IEM_MC_PUSH_U16(a_u16Value) \
11753 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11754 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
11755#define IEM_MC_PUSH_U32(a_u32Value) \
11756 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11757 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
11758#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11759 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11760 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
11761#define IEM_MC_PUSH_U64(a_u64Value) \
11762 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11763 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
11764
11765#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11766 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11767 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11768#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11769 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11770 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
11771#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11772 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11773 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
11774
11775#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
11776 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11777 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11778#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
11779 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11780 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
11781
11782
11783DECL_FORCE_INLINE_THROW(uint32_t)
11784iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11785{
11786 /* Use16BitSp: */
11787#ifdef RT_ARCH_AMD64
11788 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11789 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11790#else
11791 /* sub regeff, regrsp, #cbMem */
11792 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
11793 /* and regeff, regeff, #0xffff */
11794 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11795 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
11796 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11797 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
11798#endif
11799 return off;
11800}
11801
11802
11803DECL_FORCE_INLINE(uint32_t)
11804iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11805{
11806 /* Use32BitSp: */
11807 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11808 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11809 return off;
11810}
11811
11812
11813/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
11814DECL_INLINE_THROW(uint32_t)
11815iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
11816 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11817{
11818 /*
11819 * Assert sanity.
11820 */
11821 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11822#ifdef VBOX_STRICT
11823 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11824 {
11825 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11826 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11827 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11828 Assert( pfnFunction
11829 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11830 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
11831 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
11832 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11833 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
11834 : UINT64_C(0xc000b000a0009000) ));
11835 }
11836 else
11837 Assert( pfnFunction
11838 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
11839 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
11840 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
11841 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
11842 : UINT64_C(0xc000b000a0009000) ));
11843#endif
11844
11845#ifdef VBOX_STRICT
11846 /*
11847 * Check that the fExec flags we've got make sense.
11848 */
11849 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11850#endif
11851
11852 /*
11853 * To keep things simple we have to commit any pending writes first as we
11854 * may end up making calls.
11855 */
11856 /** @todo we could postpone this till we make the call and reload the
11857 * registers after returning from the call. Not sure if that's sensible or
11858 * not, though. */
11859 off = iemNativeRegFlushPendingWrites(pReNative, off);
11860
11861 /*
11862 * First we calculate the new RSP and the effective stack pointer value.
11863 * For 64-bit mode and flat 32-bit these two are the same.
11864 * (Code structure is very similar to that of PUSH)
11865 */
11866 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11867 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
11868 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
11869 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
11870 ? cbMem : sizeof(uint16_t);
11871 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11872 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11873 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11874 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11875 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11876 if (cBitsFlat != 0)
11877 {
11878 Assert(idxRegEffSp == idxRegRsp);
11879 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11880 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11881 if (cBitsFlat == 64)
11882 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
11883 else
11884 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
11885 }
11886 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11887 {
11888 Assert(idxRegEffSp != idxRegRsp);
11889 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11890 kIemNativeGstRegUse_ReadOnly);
11891#ifdef RT_ARCH_AMD64
11892 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11893#else
11894 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11895#endif
11896 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11897 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11898 offFixupJumpToUseOtherBitSp = off;
11899 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11900 {
11901 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11902 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11903 }
11904 else
11905 {
11906 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11907 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11908 }
11909 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11910 }
11911 /* SpUpdateEnd: */
11912 uint32_t const offLabelSpUpdateEnd = off;
11913
11914 /*
11915 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11916 * we're skipping lookup).
11917 */
11918 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11919 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
11920 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11921 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11922 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11923 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11924 : UINT32_MAX;
11925 uint8_t const idxRegValue = !TlbState.fSkip
11926 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11927 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
11928 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
11929 : UINT8_MAX;
11930 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11931
11932
11933 if (!TlbState.fSkip)
11934 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11935 else
11936 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11937
11938 /*
11939 * Use16BitSp:
11940 */
11941 if (cBitsFlat == 0)
11942 {
11943#ifdef RT_ARCH_AMD64
11944 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11945#else
11946 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11947#endif
11948 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11949 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11950 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11951 else
11952 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11953 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11954 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11955 }
11956
11957 /*
11958 * TlbMiss:
11959 *
11960 * Call helper to do the pushing.
11961 */
11962 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11963
11964#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11965 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11966#else
11967 RT_NOREF(idxInstr);
11968#endif
11969
11970 /* Save variables in volatile registers. */
11971 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11972 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11973 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
11974 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
11975 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11976
11977 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
11978 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
11979 {
11980 /* Swap them using ARG0 as temp register: */
11981 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
11982 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
11983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
11984 }
11985 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
11986 {
11987 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
11988 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
11989 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11990
11991 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
11992 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
11993 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11994 }
11995 else
11996 {
11997 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
11998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11999
12000 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12001 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12002 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12003 }
12004
12005 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12006 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12007
12008 /* Done setting up parameters, make the call. */
12009 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12010
12011 /* Restore variables and guest shadow registers to volatile registers. */
12012 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12013 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12014
12015#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12016 if (!TlbState.fSkip)
12017 {
12018 /* end of TlbMiss - Jump to the done label. */
12019 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12020 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12021
12022 /*
12023 * TlbLookup:
12024 */
12025 off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1, IEM_ACCESS_TYPE_WRITE,
12026 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12027
12028 /*
12029 * Emit code to do the actual storing / fetching.
12030 */
12031 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12032# ifdef VBOX_WITH_STATISTICS
12033 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12034 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12035# endif
12036 if (idxRegValue != UINT8_MAX)
12037 {
12038 switch (cbMemAccess)
12039 {
12040 case 2:
12041 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12042 break;
12043 case 4:
12044 if (!fIsIntelSeg)
12045 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12046 else
12047 {
12048 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12049 PUSH FS in real mode, so we have to try emulate that here.
12050 We borrow the now unused idxReg1 from the TLB lookup code here. */
12051 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12052 kIemNativeGstReg_EFlags);
12053 if (idxRegEfl != UINT8_MAX)
12054 {
12055#ifdef ARCH_AMD64
12056 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12057 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12058 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12059#else
12060 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12061 off, TlbState.idxReg1, idxRegEfl,
12062 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12063#endif
12064 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12065 }
12066 else
12067 {
12068 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12069 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12070 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12071 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12072 }
12073 /* ASSUMES the upper half of idxRegValue is ZERO. */
12074 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12075 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12076 }
12077 break;
12078 case 8:
12079 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12080 break;
12081 default:
12082 AssertFailed();
12083 }
12084 }
12085 else
12086 {
12087 switch (cbMemAccess)
12088 {
12089 case 2:
12090 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
12091 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
12092 idxRegMemResult, TlbState.idxReg1);
12093 break;
12094 case 4:
12095 Assert(!fIsSegReg);
12096 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
12097 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
12098 idxRegMemResult, TlbState.idxReg1);
12099 break;
12100 case 8:
12101 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
12102 idxRegMemResult, TlbState.idxReg1);
12103 break;
12104 default:
12105 AssertFailed();
12106 }
12107 }
12108
12109 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12110 TlbState.freeRegsAndReleaseVars(pReNative);
12111
12112 /*
12113 * TlbDone:
12114 *
12115 * Commit the new RSP value.
12116 */
12117 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12118 }
12119#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12120
12121 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12122 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12123 if (idxRegEffSp != idxRegRsp)
12124 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12125
12126 /* The value variable is implictly flushed. */
12127 if (idxRegValue != UINT8_MAX)
12128 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12129 iemNativeVarFreeLocal(pReNative, idxVarValue);
12130
12131 return off;
12132}
12133
12134
12135
12136/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12137#define IEM_MC_POP_GREG_U16(a_iGReg) \
12138 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12139 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12140#define IEM_MC_POP_GREG_U32(a_iGReg) \
12141 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12142 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12143#define IEM_MC_POP_GREG_U64(a_iGReg) \
12144 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12145 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12146
12147#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12148 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12149 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12150#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12151 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12152 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12153
12154#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12155 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12156 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12157#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12158 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12159 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12160
12161
12162DECL_FORCE_INLINE_THROW(uint32_t)
12163iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12164 uint8_t idxRegTmp)
12165{
12166 /* Use16BitSp: */
12167#ifdef RT_ARCH_AMD64
12168 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12169 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12170 RT_NOREF(idxRegTmp);
12171#else
12172 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12173 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12174 /* add tmp, regrsp, #cbMem */
12175 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12176 /* and tmp, tmp, #0xffff */
12177 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12178 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12179 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12180 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12181#endif
12182 return off;
12183}
12184
12185
12186DECL_FORCE_INLINE(uint32_t)
12187iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12188{
12189 /* Use32BitSp: */
12190 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12191 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12192 return off;
12193}
12194
12195
12196/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12197DECL_INLINE_THROW(uint32_t)
12198iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12199 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12200{
12201 /*
12202 * Assert sanity.
12203 */
12204 Assert(idxGReg < 16);
12205#ifdef VBOX_STRICT
12206 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12207 {
12208 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12209 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12210 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12211 Assert( pfnFunction
12212 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12213 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12214 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12215 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12216 : UINT64_C(0xc000b000a0009000) ));
12217 }
12218 else
12219 Assert( pfnFunction
12220 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12221 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12222 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12223 : UINT64_C(0xc000b000a0009000) ));
12224#endif
12225
12226#ifdef VBOX_STRICT
12227 /*
12228 * Check that the fExec flags we've got make sense.
12229 */
12230 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12231#endif
12232
12233 /*
12234 * To keep things simple we have to commit any pending writes first as we
12235 * may end up making calls.
12236 */
12237 off = iemNativeRegFlushPendingWrites(pReNative, off);
12238
12239 /*
12240 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12241 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12242 * directly as the effective stack pointer.
12243 * (Code structure is very similar to that of PUSH)
12244 */
12245 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12246 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12247 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12248 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12249 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12250 /** @todo can do a better job picking the register here. For cbMem >= 4 this
12251 * will be the resulting register value. */
12252 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
12253
12254 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12255 if (cBitsFlat != 0)
12256 {
12257 Assert(idxRegEffSp == idxRegRsp);
12258 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12259 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12260 }
12261 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12262 {
12263 Assert(idxRegEffSp != idxRegRsp);
12264 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12265 kIemNativeGstRegUse_ReadOnly);
12266#ifdef RT_ARCH_AMD64
12267 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12268#else
12269 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12270#endif
12271 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12272 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12273 offFixupJumpToUseOtherBitSp = off;
12274 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12275 {
12276/** @todo can skip idxRegRsp updating when popping ESP. */
12277 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12278 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12279 }
12280 else
12281 {
12282 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12283 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12284 }
12285 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12286 }
12287 /* SpUpdateEnd: */
12288 uint32_t const offLabelSpUpdateEnd = off;
12289
12290 /*
12291 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12292 * we're skipping lookup).
12293 */
12294 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12295 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
12296 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12297 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12298 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12299 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12300 : UINT32_MAX;
12301
12302 if (!TlbState.fSkip)
12303 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12304 else
12305 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12306
12307 /*
12308 * Use16BitSp:
12309 */
12310 if (cBitsFlat == 0)
12311 {
12312#ifdef RT_ARCH_AMD64
12313 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12314#else
12315 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12316#endif
12317 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12318 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12319 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12320 else
12321 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12322 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12323 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12324 }
12325
12326 /*
12327 * TlbMiss:
12328 *
12329 * Call helper to do the pushing.
12330 */
12331 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12332
12333#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12334 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12335#else
12336 RT_NOREF(idxInstr);
12337#endif
12338
12339 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12340 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12341 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
12342 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12343
12344
12345 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
12346 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12348
12349 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12350 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12351
12352 /* Done setting up parameters, make the call. */
12353 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12354
12355 /* Move the return register content to idxRegMemResult. */
12356 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12357 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12358
12359 /* Restore variables and guest shadow registers to volatile registers. */
12360 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12361 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12362
12363#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12364 if (!TlbState.fSkip)
12365 {
12366 /* end of TlbMiss - Jump to the done label. */
12367 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12368 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12369
12370 /*
12371 * TlbLookup:
12372 */
12373 off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
12374 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12375
12376 /*
12377 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
12378 */
12379 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12380# ifdef VBOX_WITH_STATISTICS
12381 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12382 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12383# endif
12384 switch (cbMem)
12385 {
12386 case 2:
12387 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12388 break;
12389 case 4:
12390 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12391 break;
12392 case 8:
12393 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12394 break;
12395 default:
12396 AssertFailed();
12397 }
12398
12399 TlbState.freeRegsAndReleaseVars(pReNative);
12400
12401 /*
12402 * TlbDone:
12403 *
12404 * Set the new RSP value (FLAT accesses needs to calculate it first) and
12405 * commit the popped register value.
12406 */
12407 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12408 }
12409#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12410
12411 if (idxGReg != X86_GREG_xSP)
12412 {
12413 /* Set the register. */
12414 if (cbMem >= sizeof(uint32_t))
12415 {
12416 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
12417 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
12418 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12419 }
12420 else
12421 {
12422 Assert(cbMem == sizeof(uint16_t));
12423 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
12424 kIemNativeGstRegUse_ForUpdate);
12425 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
12426 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12427 iemNativeRegFreeTmp(pReNative, idxRegDst);
12428 }
12429
12430 /* Complete RSP calculation for FLAT mode. */
12431 if (idxRegEffSp == idxRegRsp)
12432 {
12433 if (cBitsFlat == 64)
12434 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12435 else
12436 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12437 }
12438 }
12439 else
12440 {
12441 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
12442 if (cbMem == sizeof(uint64_t))
12443 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
12444 else if (cbMem == sizeof(uint32_t))
12445 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
12446 else
12447 {
12448 if (idxRegEffSp == idxRegRsp)
12449 {
12450 if (cBitsFlat == 64)
12451 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12452 else
12453 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12454 }
12455 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
12456 }
12457 }
12458 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
12459
12460 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12461 if (idxRegEffSp != idxRegRsp)
12462 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12463 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12464
12465 return off;
12466}
12467
12468
12469
12470/*********************************************************************************************************************************
12471* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
12472*********************************************************************************************************************************/
12473
12474#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12475 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12476 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
12477 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
12478
12479#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12480 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12481 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
12482 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
12483
12484#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12485 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12486 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
12487 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
12488
12489
12490#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12492 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12493 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
12494
12495#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12496 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12497 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12498 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12499
12500#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12501 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12502 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12503 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
12504
12505#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12506 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
12507 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12508 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12509
12510
12511#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12512 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12513 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12514 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
12515
12516#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12517 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12518 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12519 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12520
12521#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12522 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12523 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12524 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
12525
12526#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12527 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
12528 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12529 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12530
12531
12532#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12533 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12534 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12535 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
12536
12537#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12538 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12539 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12540 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12541
12542#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12543 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12544 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12545 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
12546
12547#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12548 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
12549 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12550 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12551
12552
12553#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12554 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12555 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12556 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
12557
12558#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12559 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12560 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12561 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
12562
12563
12564#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12565 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12566 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12567 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
12568
12569#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12570 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12571 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12572 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
12573
12574#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12575 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12576 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12577 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
12578
12579
12580
12581#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12582 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12583 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
12584 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
12585
12586#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12587 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12588 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
12589 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
12590
12591#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12592 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12593 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
12594 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
12595
12596
12597#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12598 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12599 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12600 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
12601
12602#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12603 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12604 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12605 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12606
12607#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12608 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12609 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12610 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
12611
12612#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
12613 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
12614 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12615 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12616
12617
12618#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12619 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12620 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12621 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
12622
12623#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12624 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12625 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12626 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12627
12628#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12629 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12630 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12631 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
12632
12633#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
12634 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
12635 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12636 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12637
12638
12639#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12640 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12641 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12642 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
12643
12644#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12645 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12646 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12647 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12648
12649#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12650 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12651 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12652 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
12653
12654#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
12655 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
12656 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12657 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12658
12659
12660#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
12661 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12662 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12663 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
12664
12665#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
12666 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12667 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12668 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
12669
12670
12671#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12672 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12673 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12674 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
12675
12676#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12677 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12678 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12679 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
12680
12681#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12682 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12683 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12684 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
12685
12686
12687DECL_INLINE_THROW(uint32_t)
12688iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
12689 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
12690 uintptr_t pfnFunction, uint8_t idxInstr)
12691{
12692 /*
12693 * Assert sanity.
12694 */
12695 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
12696 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
12697 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
12698 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12699
12700 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12701 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
12702 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
12703 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12704
12705 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12706 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
12707 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
12708 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12709
12710 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12711
12712 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12713
12714#ifdef VBOX_STRICT
12715# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
12716 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
12717 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
12718 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
12719 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
12720
12721 if (iSegReg == UINT8_MAX)
12722 {
12723 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12724 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12725 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12726 switch (cbMem)
12727 {
12728 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
12729 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
12730 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
12731 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
12732 case 10:
12733 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
12734 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
12735 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12736 break;
12737 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
12738# if 0
12739 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
12740 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
12741# endif
12742 default: AssertFailed(); break;
12743 }
12744 }
12745 else
12746 {
12747 Assert(iSegReg < 6);
12748 switch (cbMem)
12749 {
12750 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
12751 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
12752 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
12753 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
12754 case 10:
12755 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
12756 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
12757 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12758 break;
12759 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
12760# if 0
12761 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
12762 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
12763# endif
12764 default: AssertFailed(); break;
12765 }
12766 }
12767# undef IEM_MAP_HLP_FN
12768#endif
12769
12770#ifdef VBOX_STRICT
12771 /*
12772 * Check that the fExec flags we've got make sense.
12773 */
12774 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12775#endif
12776
12777 /*
12778 * To keep things simple we have to commit any pending writes first as we
12779 * may end up making calls.
12780 */
12781 off = iemNativeRegFlushPendingWrites(pReNative, off);
12782
12783#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12784 /*
12785 * Move/spill/flush stuff out of call-volatile registers.
12786 * This is the easy way out. We could contain this to the tlb-miss branch
12787 * by saving and restoring active stuff here.
12788 */
12789 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
12790 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
12791#endif
12792
12793 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
12794 while the tlb-miss codepath will temporarily put it on the stack.
12795 Set the the type to stack here so we don't need to do it twice below. */
12796 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
12797 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
12798 /** @todo use a tmp register from TlbState, since they'll be free after tlb
12799 * lookup is done. */
12800
12801 /*
12802 * Define labels and allocate the result register (trying for the return
12803 * register if we can).
12804 */
12805 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12806 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
12807 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
12808 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
12809 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
12810 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12811 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12812 : UINT32_MAX;
12813//off=iemNativeEmitBrk(pReNative, off, 0);
12814 /*
12815 * Jump to the TLB lookup code.
12816 */
12817 if (!TlbState.fSkip)
12818 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12819
12820 /*
12821 * TlbMiss:
12822 *
12823 * Call helper to do the fetching.
12824 * We flush all guest register shadow copies here.
12825 */
12826 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
12827
12828#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12829 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12830#else
12831 RT_NOREF(idxInstr);
12832#endif
12833
12834#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12835 /* Save variables in volatile registers. */
12836 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
12837 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12838#endif
12839
12840 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
12841 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
12842#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12843 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12844#else
12845 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12846#endif
12847
12848 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
12849 if (iSegReg != UINT8_MAX)
12850 {
12851 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12852 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
12853 }
12854
12855 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
12856 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
12857 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
12858
12859 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12860 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12861
12862 /* Done setting up parameters, make the call. */
12863 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12864
12865 /*
12866 * Put the output in the right registers.
12867 */
12868 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
12869 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12870 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12871
12872#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12873 /* Restore variables and guest shadow registers to volatile registers. */
12874 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12875 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12876#endif
12877
12878 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
12879 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
12880
12881#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12882 if (!TlbState.fSkip)
12883 {
12884 /* end of tlbsmiss - Jump to the done label. */
12885 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12886 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12887
12888 /*
12889 * TlbLookup:
12890 */
12891 off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
12892 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12893# ifdef VBOX_WITH_STATISTICS
12894 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
12895 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
12896# endif
12897
12898 /* [idxVarUnmapInfo] = 0; */
12899 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
12900
12901 /*
12902 * TlbDone:
12903 */
12904 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12905
12906 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12907
12908# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12909 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12910 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12911# endif
12912 }
12913#else
12914 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
12915#endif
12916
12917 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12918 iemNativeVarRegisterRelease(pReNative, idxVarMem);
12919
12920 return off;
12921}
12922
12923
12924#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
12925 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
12926 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
12927
12928#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
12929 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
12930 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
12931
12932#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
12933 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
12934 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
12935
12936DECL_INLINE_THROW(uint32_t)
12937iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
12938 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
12939{
12940 /*
12941 * Assert sanity.
12942 */
12943 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12944 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
12945 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
12946 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
12947#ifdef VBOX_STRICT
12948 switch (fAccess & IEM_ACCESS_TYPE_MASK)
12949 {
12950 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
12951 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
12952 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
12953 default: AssertFailed();
12954 }
12955#else
12956 RT_NOREF(fAccess);
12957#endif
12958
12959 /*
12960 * To keep things simple we have to commit any pending writes first as we
12961 * may end up making calls (there shouldn't be any at this point, so this
12962 * is just for consistency).
12963 */
12964 /** @todo we could postpone this till we make the call and reload the
12965 * registers after returning from the call. Not sure if that's sensible or
12966 * not, though. */
12967 off = iemNativeRegFlushPendingWrites(pReNative, off);
12968
12969 /*
12970 * Move/spill/flush stuff out of call-volatile registers.
12971 *
12972 * We exclude any register holding the bUnmapInfo variable, as we'll be
12973 * checking it after returning from the call and will free it afterwards.
12974 */
12975 /** @todo save+restore active registers and maybe guest shadows in miss
12976 * scenario. */
12977 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
12978
12979 /*
12980 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
12981 * to call the unmap helper function.
12982 *
12983 * The likelyhood of it being zero is higher than for the TLB hit when doing
12984 * the mapping, as a TLB miss for an well aligned and unproblematic memory
12985 * access should also end up with a mapping that won't need special unmapping.
12986 */
12987 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
12988 * should speed up things for the pure interpreter as well when TLBs
12989 * are enabled. */
12990#ifdef RT_ARCH_AMD64
12991 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
12992 {
12993 /* test byte [rbp - xxx], 0ffh */
12994 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
12995 pbCodeBuf[off++] = 0xf6;
12996 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
12997 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
12998 pbCodeBuf[off++] = 0xff;
12999 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13000 }
13001 else
13002#endif
13003 {
13004 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13005 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13006 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13007 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13008 }
13009 uint32_t const offJmpFixup = off;
13010 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13011
13012 /*
13013 * Call the unmap helper function.
13014 */
13015#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13016 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13017#else
13018 RT_NOREF(idxInstr);
13019#endif
13020
13021 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13022 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13023 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13024
13025 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13026 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13027
13028 /* Done setting up parameters, make the call. */
13029 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13030
13031 /* The bUnmapInfo variable is implictly free by these MCs. */
13032 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13033
13034 /*
13035 * Done, just fixup the jump for the non-call case.
13036 */
13037 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13038
13039 return off;
13040}
13041
13042
13043
13044/*********************************************************************************************************************************
13045* State and Exceptions *
13046*********************************************************************************************************************************/
13047
13048#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13049#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13050
13051#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13052#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13053#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13054
13055#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13056#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13057#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13058
13059
13060DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13061{
13062 /** @todo this needs a lot more work later. */
13063 RT_NOREF(pReNative, fForChange);
13064 return off;
13065}
13066
13067
13068/*********************************************************************************************************************************
13069* The native code generator functions for each MC block. *
13070*********************************************************************************************************************************/
13071
13072
13073/*
13074 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13075 *
13076 * This should probably live in it's own file later, but lets see what the
13077 * compile times turn out to be first.
13078 */
13079#include "IEMNativeFunctions.cpp.h"
13080
13081
13082
13083/*********************************************************************************************************************************
13084* Recompiler Core. *
13085*********************************************************************************************************************************/
13086
13087
13088/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13089static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13090{
13091 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13092 pDis->cbCachedInstr += cbMaxRead;
13093 RT_NOREF(cbMinRead);
13094 return VERR_NO_DATA;
13095}
13096
13097
13098/**
13099 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
13100 * @returns pszBuf.
13101 * @param fFlags The flags.
13102 * @param pszBuf The output buffer.
13103 * @param cbBuf The output buffer size. At least 32 bytes.
13104 */
13105DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
13106{
13107 Assert(cbBuf >= 32);
13108 static RTSTRTUPLE const s_aModes[] =
13109 {
13110 /* [00] = */ { RT_STR_TUPLE("16BIT") },
13111 /* [01] = */ { RT_STR_TUPLE("32BIT") },
13112 /* [02] = */ { RT_STR_TUPLE("!2!") },
13113 /* [03] = */ { RT_STR_TUPLE("!3!") },
13114 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
13115 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
13116 /* [06] = */ { RT_STR_TUPLE("!6!") },
13117 /* [07] = */ { RT_STR_TUPLE("!7!") },
13118 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
13119 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
13120 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
13121 /* [0b] = */ { RT_STR_TUPLE("!b!") },
13122 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
13123 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
13124 /* [0e] = */ { RT_STR_TUPLE("!e!") },
13125 /* [0f] = */ { RT_STR_TUPLE("!f!") },
13126 /* [10] = */ { RT_STR_TUPLE("!10!") },
13127 /* [11] = */ { RT_STR_TUPLE("!11!") },
13128 /* [12] = */ { RT_STR_TUPLE("!12!") },
13129 /* [13] = */ { RT_STR_TUPLE("!13!") },
13130 /* [14] = */ { RT_STR_TUPLE("!14!") },
13131 /* [15] = */ { RT_STR_TUPLE("!15!") },
13132 /* [16] = */ { RT_STR_TUPLE("!16!") },
13133 /* [17] = */ { RT_STR_TUPLE("!17!") },
13134 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
13135 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
13136 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
13137 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
13138 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
13139 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
13140 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
13141 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
13142 };
13143 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
13144 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
13145 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
13146
13147 pszBuf[off++] = ' ';
13148 pszBuf[off++] = 'C';
13149 pszBuf[off++] = 'P';
13150 pszBuf[off++] = 'L';
13151 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
13152 Assert(off < 32);
13153
13154 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
13155
13156 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
13157 {
13158 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
13159 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
13160 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
13161 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
13162 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
13163 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
13164 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
13165 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
13166 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
13167 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
13168 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
13169 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
13170 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
13171 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
13172 };
13173 if (fFlags)
13174 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
13175 if (s_aFlags[i].fFlag & fFlags)
13176 {
13177 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
13178 pszBuf[off++] = ' ';
13179 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
13180 off += s_aFlags[i].cchName;
13181 fFlags &= ~s_aFlags[i].fFlag;
13182 if (!fFlags)
13183 break;
13184 }
13185 pszBuf[off] = '\0';
13186
13187 return pszBuf;
13188}
13189
13190
13191DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
13192{
13193 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
13194#if defined(RT_ARCH_AMD64)
13195 static const char * const a_apszMarkers[] =
13196 {
13197 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
13198 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
13199 };
13200#endif
13201
13202 char szDisBuf[512];
13203 DISSTATE Dis;
13204 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
13205 uint32_t const cNative = pTb->Native.cInstructions;
13206 uint32_t offNative = 0;
13207#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13208 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
13209#endif
13210 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13211 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13212 : DISCPUMODE_64BIT;
13213#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13214 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
13215#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13216 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
13217#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13218# error "Port me"
13219#else
13220 csh hDisasm = ~(size_t)0;
13221# if defined(RT_ARCH_AMD64)
13222 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
13223# elif defined(RT_ARCH_ARM64)
13224 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
13225# else
13226# error "Port me"
13227# endif
13228 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
13229#endif
13230
13231 /*
13232 * Print TB info.
13233 */
13234 pHlp->pfnPrintf(pHlp,
13235 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
13236 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
13237 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
13238 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
13239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13240 if (pDbgInfo && pDbgInfo->cEntries > 1)
13241 {
13242 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
13243
13244 /*
13245 * This disassembly is driven by the debug info which follows the native
13246 * code and indicates when it starts with the next guest instructions,
13247 * where labels are and such things.
13248 */
13249 uint32_t idxThreadedCall = 0;
13250 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
13251 uint8_t idxRange = UINT8_MAX;
13252 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
13253 uint32_t offRange = 0;
13254 uint32_t offOpcodes = 0;
13255 uint32_t const cbOpcodes = pTb->cbOpcodes;
13256 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
13257 uint32_t const cDbgEntries = pDbgInfo->cEntries;
13258 uint32_t iDbgEntry = 1;
13259 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
13260
13261 while (offNative < cNative)
13262 {
13263 /* If we're at or have passed the point where the next chunk of debug
13264 info starts, process it. */
13265 if (offDbgNativeNext <= offNative)
13266 {
13267 offDbgNativeNext = UINT32_MAX;
13268 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
13269 {
13270 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
13271 {
13272 case kIemTbDbgEntryType_GuestInstruction:
13273 {
13274 /* Did the exec flag change? */
13275 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
13276 {
13277 pHlp->pfnPrintf(pHlp,
13278 " fExec change %#08x -> %#08x %s\n",
13279 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13280 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13281 szDisBuf, sizeof(szDisBuf)));
13282 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
13283 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13284 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13285 : DISCPUMODE_64BIT;
13286 }
13287
13288 /* New opcode range? We need to fend up a spurious debug info entry here for cases
13289 where the compilation was aborted before the opcode was recorded and the actual
13290 instruction was translated to a threaded call. This may happen when we run out
13291 of ranges, or when some complicated interrupts/FFs are found to be pending or
13292 similar. So, we just deal with it here rather than in the compiler code as it
13293 is a lot simpler to do here. */
13294 if ( idxRange == UINT8_MAX
13295 || idxRange >= cRanges
13296 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
13297 {
13298 idxRange += 1;
13299 if (idxRange < cRanges)
13300 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
13301 else
13302 continue;
13303 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
13304 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
13305 + (pTb->aRanges[idxRange].idxPhysPage == 0
13306 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13307 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
13308 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13309 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
13310 pTb->aRanges[idxRange].idxPhysPage);
13311 GCPhysPc += offRange;
13312 }
13313
13314 /* Disassemble the instruction. */
13315 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
13316 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
13317 uint32_t cbInstr = 1;
13318 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13319 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
13320 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13321 if (RT_SUCCESS(rc))
13322 {
13323 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13324 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13325 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13326 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13327
13328 static unsigned const s_offMarker = 55;
13329 static char const s_szMarker[] = " ; <--- guest";
13330 if (cch < s_offMarker)
13331 {
13332 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
13333 cch = s_offMarker;
13334 }
13335 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
13336 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
13337
13338 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
13339 }
13340 else
13341 {
13342 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
13343 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
13344 cbInstr = 1;
13345 }
13346 GCPhysPc += cbInstr;
13347 offOpcodes += cbInstr;
13348 offRange += cbInstr;
13349 continue;
13350 }
13351
13352 case kIemTbDbgEntryType_ThreadedCall:
13353 pHlp->pfnPrintf(pHlp,
13354 " Call #%u to %s (%u args) - %s\n",
13355 idxThreadedCall,
13356 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13357 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13358 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
13359 idxThreadedCall++;
13360 continue;
13361
13362 case kIemTbDbgEntryType_GuestRegShadowing:
13363 {
13364 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
13365 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
13366 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
13367 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
13368 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13369 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
13370 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
13371 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
13372 else
13373 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
13374 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
13375 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13376 continue;
13377 }
13378
13379 case kIemTbDbgEntryType_Label:
13380 {
13381 const char *pszName = "what_the_fudge";
13382 const char *pszComment = "";
13383 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
13384 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
13385 {
13386 case kIemNativeLabelType_Return:
13387 pszName = "Return";
13388 break;
13389 case kIemNativeLabelType_ReturnBreak:
13390 pszName = "ReturnBreak";
13391 break;
13392 case kIemNativeLabelType_ReturnWithFlags:
13393 pszName = "ReturnWithFlags";
13394 break;
13395 case kIemNativeLabelType_NonZeroRetOrPassUp:
13396 pszName = "NonZeroRetOrPassUp";
13397 break;
13398 case kIemNativeLabelType_RaiseGp0:
13399 pszName = "RaiseGp0";
13400 break;
13401 case kIemNativeLabelType_ObsoleteTb:
13402 pszName = "ObsoleteTb";
13403 break;
13404 case kIemNativeLabelType_NeedCsLimChecking:
13405 pszName = "NeedCsLimChecking";
13406 break;
13407 case kIemNativeLabelType_CheckBranchMiss:
13408 pszName = "CheckBranchMiss";
13409 break;
13410 case kIemNativeLabelType_If:
13411 pszName = "If";
13412 fNumbered = true;
13413 break;
13414 case kIemNativeLabelType_Else:
13415 pszName = "Else";
13416 fNumbered = true;
13417 pszComment = " ; regs state restored pre-if-block";
13418 break;
13419 case kIemNativeLabelType_Endif:
13420 pszName = "Endif";
13421 fNumbered = true;
13422 break;
13423 case kIemNativeLabelType_CheckIrq:
13424 pszName = "CheckIrq_CheckVM";
13425 fNumbered = true;
13426 break;
13427 case kIemNativeLabelType_TlbLookup:
13428 pszName = "TlbLookup";
13429 fNumbered = true;
13430 break;
13431 case kIemNativeLabelType_TlbMiss:
13432 pszName = "TlbMiss";
13433 fNumbered = true;
13434 break;
13435 case kIemNativeLabelType_TlbDone:
13436 pszName = "TlbDone";
13437 fNumbered = true;
13438 break;
13439 case kIemNativeLabelType_Invalid:
13440 case kIemNativeLabelType_End:
13441 break;
13442 }
13443 if (fNumbered)
13444 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
13445 else
13446 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
13447 continue;
13448 }
13449
13450 case kIemTbDbgEntryType_NativeOffset:
13451 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
13452 Assert(offDbgNativeNext > offNative);
13453 break;
13454
13455 default:
13456 AssertFailed();
13457 }
13458 iDbgEntry++;
13459 break;
13460 }
13461 }
13462
13463 /*
13464 * Disassemble the next native instruction.
13465 */
13466 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13467# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13468 uint32_t cbInstr = sizeof(paNative[0]);
13469 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13470 if (RT_SUCCESS(rc))
13471 {
13472# if defined(RT_ARCH_AMD64)
13473 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13474 {
13475 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13476 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13477 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13478 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13479 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13480 uInfo & 0x8000 ? "recompiled" : "todo");
13481 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13482 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13483 else
13484 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13485 }
13486 else
13487# endif
13488 {
13489# ifdef RT_ARCH_AMD64
13490 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13491 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13492 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13493 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13494# elif defined(RT_ARCH_ARM64)
13495 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13496 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13497 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13498# else
13499# error "Port me"
13500# endif
13501 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13502 }
13503 }
13504 else
13505 {
13506# if defined(RT_ARCH_AMD64)
13507 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13508 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13509# elif defined(RT_ARCH_ARM64)
13510 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13511# else
13512# error "Port me"
13513# endif
13514 cbInstr = sizeof(paNative[0]);
13515 }
13516 offNative += cbInstr / sizeof(paNative[0]);
13517
13518# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13519 cs_insn *pInstr;
13520 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13521 (uintptr_t)pNativeCur, 1, &pInstr);
13522 if (cInstrs > 0)
13523 {
13524 Assert(cInstrs == 1);
13525# if defined(RT_ARCH_AMD64)
13526 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13527 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13528# else
13529 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13530 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13531# endif
13532 offNative += pInstr->size / sizeof(*pNativeCur);
13533 cs_free(pInstr, cInstrs);
13534 }
13535 else
13536 {
13537# if defined(RT_ARCH_AMD64)
13538 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13539 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13540# else
13541 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13542# endif
13543 offNative++;
13544 }
13545# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13546 }
13547 }
13548 else
13549#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
13550 {
13551 /*
13552 * No debug info, just disassemble the x86 code and then the native code.
13553 *
13554 * First the guest code:
13555 */
13556 for (unsigned i = 0; i < pTb->cRanges; i++)
13557 {
13558 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
13559 + (pTb->aRanges[i].idxPhysPage == 0
13560 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13561 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
13562 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13563 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
13564 unsigned off = pTb->aRanges[i].offOpcodes;
13565 /** @todo this ain't working when crossing pages! */
13566 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
13567 while (off < cbOpcodes)
13568 {
13569 uint32_t cbInstr = 1;
13570 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13571 &pTb->pabOpcodes[off], cbOpcodes - off,
13572 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13573 if (RT_SUCCESS(rc))
13574 {
13575 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13576 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13577 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13578 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13579 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
13580 GCPhysPc += cbInstr;
13581 off += cbInstr;
13582 }
13583 else
13584 {
13585 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
13586 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
13587 break;
13588 }
13589 }
13590 }
13591
13592 /*
13593 * Then the native code:
13594 */
13595 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
13596 while (offNative < cNative)
13597 {
13598 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13599# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13600 uint32_t cbInstr = sizeof(paNative[0]);
13601 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13602 if (RT_SUCCESS(rc))
13603 {
13604# if defined(RT_ARCH_AMD64)
13605 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13606 {
13607 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13608 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13609 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13610 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13611 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13612 uInfo & 0x8000 ? "recompiled" : "todo");
13613 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13614 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13615 else
13616 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13617 }
13618 else
13619# endif
13620 {
13621# ifdef RT_ARCH_AMD64
13622 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13623 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13624 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13625 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13626# elif defined(RT_ARCH_ARM64)
13627 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13628 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13629 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13630# else
13631# error "Port me"
13632# endif
13633 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13634 }
13635 }
13636 else
13637 {
13638# if defined(RT_ARCH_AMD64)
13639 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13640 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13641# else
13642 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13643# endif
13644 cbInstr = sizeof(paNative[0]);
13645 }
13646 offNative += cbInstr / sizeof(paNative[0]);
13647
13648# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13649 cs_insn *pInstr;
13650 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13651 (uintptr_t)pNativeCur, 1, &pInstr);
13652 if (cInstrs > 0)
13653 {
13654 Assert(cInstrs == 1);
13655# if defined(RT_ARCH_AMD64)
13656 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13657 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13658# else
13659 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13660 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13661# endif
13662 offNative += pInstr->size / sizeof(*pNativeCur);
13663 cs_free(pInstr, cInstrs);
13664 }
13665 else
13666 {
13667# if defined(RT_ARCH_AMD64)
13668 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13669 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13670# else
13671 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13672# endif
13673 offNative++;
13674 }
13675# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13676 }
13677 }
13678
13679#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13680 /* Cleanup. */
13681 cs_close(&hDisasm);
13682#endif
13683}
13684
13685
13686/**
13687 * Recompiles the given threaded TB into a native one.
13688 *
13689 * In case of failure the translation block will be returned as-is.
13690 *
13691 * @returns pTb.
13692 * @param pVCpu The cross context virtual CPU structure of the calling
13693 * thread.
13694 * @param pTb The threaded translation to recompile to native.
13695 */
13696DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
13697{
13698 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
13699
13700 /*
13701 * The first time thru, we allocate the recompiler state, the other times
13702 * we just need to reset it before using it again.
13703 */
13704 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
13705 if (RT_LIKELY(pReNative))
13706 iemNativeReInit(pReNative, pTb);
13707 else
13708 {
13709 pReNative = iemNativeInit(pVCpu, pTb);
13710 AssertReturn(pReNative, pTb);
13711 }
13712
13713 /*
13714 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
13715 * for aborting if an error happens.
13716 */
13717 uint32_t cCallsLeft = pTb->Thrd.cCalls;
13718#ifdef LOG_ENABLED
13719 uint32_t const cCallsOrg = cCallsLeft;
13720#endif
13721 uint32_t off = 0;
13722 int rc = VINF_SUCCESS;
13723 IEMNATIVE_TRY_SETJMP(pReNative, rc)
13724 {
13725 /*
13726 * Emit prolog code (fixed).
13727 */
13728 off = iemNativeEmitProlog(pReNative, off);
13729
13730 /*
13731 * Convert the calls to native code.
13732 */
13733#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13734 int32_t iGstInstr = -1;
13735#endif
13736#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
13737 uint32_t cThreadedCalls = 0;
13738 uint32_t cRecompiledCalls = 0;
13739#endif
13740 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
13741 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
13742 while (cCallsLeft-- > 0)
13743 {
13744 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
13745
13746 /*
13747 * Debug info and assembly markup.
13748 */
13749 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
13750 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
13751#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13752 iemNativeDbgInfoAddNativeOffset(pReNative, off);
13753 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
13754 {
13755 if (iGstInstr < (int32_t)pTb->cInstructions)
13756 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
13757 else
13758 Assert(iGstInstr == pTb->cInstructions);
13759 iGstInstr = pCallEntry->idxInstr;
13760 }
13761 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
13762#endif
13763#if defined(VBOX_STRICT)
13764 off = iemNativeEmitMarker(pReNative, off,
13765 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
13766 pCallEntry->enmFunction));
13767#endif
13768#if defined(VBOX_STRICT)
13769 iemNativeRegAssertSanity(pReNative);
13770#endif
13771
13772 /*
13773 * Actual work.
13774 */
13775 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
13776 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
13777 if (pfnRecom) /** @todo stats on this. */
13778 {
13779 off = pfnRecom(pReNative, off, pCallEntry);
13780 STAM_REL_STATS({cRecompiledCalls++;});
13781 }
13782 else
13783 {
13784 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
13785 STAM_REL_STATS({cThreadedCalls++;});
13786 }
13787 Assert(off <= pReNative->cInstrBufAlloc);
13788 Assert(pReNative->cCondDepth == 0);
13789
13790 /*
13791 * Advance.
13792 */
13793 pCallEntry++;
13794 }
13795
13796 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
13797 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
13798 if (!cThreadedCalls)
13799 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
13800
13801 /*
13802 * Emit the epilog code.
13803 */
13804 uint32_t idxReturnLabel;
13805 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
13806
13807 /*
13808 * Generate special jump labels.
13809 */
13810 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
13811 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
13812 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
13813 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
13814 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
13815 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
13816 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
13817 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
13818 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
13819 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
13820 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
13821 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
13822 }
13823 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
13824 {
13825 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
13826 return pTb;
13827 }
13828 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
13829 Assert(off <= pReNative->cInstrBufAlloc);
13830
13831 /*
13832 * Make sure all labels has been defined.
13833 */
13834 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
13835#ifdef VBOX_STRICT
13836 uint32_t const cLabels = pReNative->cLabels;
13837 for (uint32_t i = 0; i < cLabels; i++)
13838 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
13839#endif
13840
13841 /*
13842 * Allocate executable memory, copy over the code we've generated.
13843 */
13844 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
13845 if (pTbAllocator->pDelayedFreeHead)
13846 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
13847
13848 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
13849 AssertReturn(paFinalInstrBuf, pTb);
13850 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
13851
13852 /*
13853 * Apply fixups.
13854 */
13855 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
13856 uint32_t const cFixups = pReNative->cFixups;
13857 for (uint32_t i = 0; i < cFixups; i++)
13858 {
13859 Assert(paFixups[i].off < off);
13860 Assert(paFixups[i].idxLabel < cLabels);
13861 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
13862 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
13863 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
13864 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
13865 switch (paFixups[i].enmType)
13866 {
13867#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
13868 case kIemNativeFixupType_Rel32:
13869 Assert(paFixups[i].off + 4 <= off);
13870 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13871 continue;
13872
13873#elif defined(RT_ARCH_ARM64)
13874 case kIemNativeFixupType_RelImm26At0:
13875 {
13876 Assert(paFixups[i].off < off);
13877 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13878 Assert(offDisp >= -262144 && offDisp < 262144);
13879 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
13880 continue;
13881 }
13882
13883 case kIemNativeFixupType_RelImm19At5:
13884 {
13885 Assert(paFixups[i].off < off);
13886 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13887 Assert(offDisp >= -262144 && offDisp < 262144);
13888 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
13889 continue;
13890 }
13891
13892 case kIemNativeFixupType_RelImm14At5:
13893 {
13894 Assert(paFixups[i].off < off);
13895 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13896 Assert(offDisp >= -8192 && offDisp < 8192);
13897 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
13898 continue;
13899 }
13900
13901#endif
13902 case kIemNativeFixupType_Invalid:
13903 case kIemNativeFixupType_End:
13904 break;
13905 }
13906 AssertFailed();
13907 }
13908
13909 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
13910 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
13911
13912 /*
13913 * Convert the translation block.
13914 */
13915 RTMemFree(pTb->Thrd.paCalls);
13916 pTb->Native.paInstructions = paFinalInstrBuf;
13917 pTb->Native.cInstructions = off;
13918 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
13919#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13920 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
13921 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
13922#endif
13923
13924 Assert(pTbAllocator->cThreadedTbs > 0);
13925 pTbAllocator->cThreadedTbs -= 1;
13926 pTbAllocator->cNativeTbs += 1;
13927 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
13928
13929#ifdef LOG_ENABLED
13930 /*
13931 * Disassemble to the log if enabled.
13932 */
13933 if (LogIs3Enabled())
13934 {
13935 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
13936 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
13937# ifdef DEBUG_bird
13938 RTLogFlush(NULL);
13939# endif
13940 }
13941#endif
13942 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
13943
13944 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
13945 return pTb;
13946}
13947
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette