VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102471

Last change on this file since 102471 was 102471, checked in by vboxsync, 17 months ago

VMM/IEM: IEM_MC_ASSIGN_TO_SMALLER (lea); fixed a few bugs wrt guest register shadowing. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 482.3 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102471 2023-12-05 12:46:45Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144
145/*********************************************************************************************************************************
146* Executable Memory Allocator *
147*********************************************************************************************************************************/
148/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149 * Use an alternative chunk sub-allocator that does store internal data
150 * in the chunk.
151 *
152 * Using the RTHeapSimple is not practial on newer darwin systems where
153 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
154 * memory. We would have to change the protection of the whole chunk for
155 * every call to RTHeapSimple, which would be rather expensive.
156 *
157 * This alternative implemenation let restrict page protection modifications
158 * to the pages backing the executable memory we just allocated.
159 */
160#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
161/** The chunk sub-allocation unit size in bytes. */
162#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
163/** The chunk sub-allocation unit size as a shift factor. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
165
166#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
167# ifdef IEMNATIVE_USE_GDB_JIT
168# define IEMNATIVE_USE_GDB_JIT_ET_DYN
169
170/** GDB JIT: Code entry. */
171typedef struct GDBJITCODEENTRY
172{
173 struct GDBJITCODEENTRY *pNext;
174 struct GDBJITCODEENTRY *pPrev;
175 uint8_t *pbSymFile;
176 uint64_t cbSymFile;
177} GDBJITCODEENTRY;
178
179/** GDB JIT: Actions. */
180typedef enum GDBJITACTIONS : uint32_t
181{
182 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
183} GDBJITACTIONS;
184
185/** GDB JIT: Descriptor. */
186typedef struct GDBJITDESCRIPTOR
187{
188 uint32_t uVersion;
189 GDBJITACTIONS enmAction;
190 GDBJITCODEENTRY *pRelevant;
191 GDBJITCODEENTRY *pHead;
192 /** Our addition: */
193 GDBJITCODEENTRY *pTail;
194} GDBJITDESCRIPTOR;
195
196/** GDB JIT: Our simple symbol file data. */
197typedef struct GDBJITSYMFILE
198{
199 Elf64_Ehdr EHdr;
200# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Shdr aShdrs[5];
202# else
203 Elf64_Shdr aShdrs[7];
204 Elf64_Phdr aPhdrs[2];
205# endif
206 /** The dwarf ehframe data for the chunk. */
207 uint8_t abEhFrame[512];
208 char szzStrTab[128];
209 Elf64_Sym aSymbols[3];
210# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
211 Elf64_Sym aDynSyms[2];
212 Elf64_Dyn aDyn[6];
213# endif
214} GDBJITSYMFILE;
215
216extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
217extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
218
219/** Init once for g_IemNativeGdbJitLock. */
220static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
221/** Init once for the critical section. */
222static RTCRITSECT g_IemNativeGdbJitLock;
223
224/** GDB reads the info here. */
225GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
226
227/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
228DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
229{
230 ASMNopPause();
231}
232
233/** @callback_method_impl{FNRTONCE} */
234static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
235{
236 RT_NOREF(pvUser);
237 return RTCritSectInit(&g_IemNativeGdbJitLock);
238}
239
240
241# endif /* IEMNATIVE_USE_GDB_JIT */
242
243/**
244 * Per-chunk unwind info for non-windows hosts.
245 */
246typedef struct IEMEXECMEMCHUNKEHFRAME
247{
248# ifdef IEMNATIVE_USE_LIBUNWIND
249 /** The offset of the FDA into abEhFrame. */
250 uintptr_t offFda;
251# else
252 /** 'struct object' storage area. */
253 uint8_t abObject[1024];
254# endif
255# ifdef IEMNATIVE_USE_GDB_JIT
256# if 0
257 /** The GDB JIT 'symbol file' data. */
258 GDBJITSYMFILE GdbJitSymFile;
259# endif
260 /** The GDB JIT list entry. */
261 GDBJITCODEENTRY GdbJitEntry;
262# endif
263 /** The dwarf ehframe data for the chunk. */
264 uint8_t abEhFrame[512];
265} IEMEXECMEMCHUNKEHFRAME;
266/** Pointer to per-chunk info info for non-windows hosts. */
267typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
268#endif
269
270
271/**
272 * An chunk of executable memory.
273 */
274typedef struct IEMEXECMEMCHUNK
275{
276#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
277 /** Number of free items in this chunk. */
278 uint32_t cFreeUnits;
279 /** Hint were to start searching for free space in the allocation bitmap. */
280 uint32_t idxFreeHint;
281#else
282 /** The heap handle. */
283 RTHEAPSIMPLE hHeap;
284#endif
285 /** Pointer to the chunk. */
286 void *pvChunk;
287#ifdef IN_RING3
288 /**
289 * Pointer to the unwind information.
290 *
291 * This is used during C++ throw and longjmp (windows and probably most other
292 * platforms). Some debuggers (windbg) makes use of it as well.
293 *
294 * Windows: This is allocated from hHeap on windows because (at least for
295 * AMD64) the UNWIND_INFO structure address in the
296 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
297 *
298 * Others: Allocated from the regular heap to avoid unnecessary executable data
299 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
300 void *pvUnwindInfo;
301#elif defined(IN_RING0)
302 /** Allocation handle. */
303 RTR0MEMOBJ hMemObj;
304#endif
305} IEMEXECMEMCHUNK;
306/** Pointer to a memory chunk. */
307typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
308
309
310/**
311 * Executable memory allocator for the native recompiler.
312 */
313typedef struct IEMEXECMEMALLOCATOR
314{
315 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
316 uint32_t uMagic;
317
318 /** The chunk size. */
319 uint32_t cbChunk;
320 /** The maximum number of chunks. */
321 uint32_t cMaxChunks;
322 /** The current number of chunks. */
323 uint32_t cChunks;
324 /** Hint where to start looking for available memory. */
325 uint32_t idxChunkHint;
326 /** Statistics: Current number of allocations. */
327 uint32_t cAllocations;
328
329 /** The total amount of memory available. */
330 uint64_t cbTotal;
331 /** Total amount of free memory. */
332 uint64_t cbFree;
333 /** Total amount of memory allocated. */
334 uint64_t cbAllocated;
335
336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
337 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
338 *
339 * Since the chunk size is a power of two and the minimum chunk size is a lot
340 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
341 * require a whole number of uint64_t elements in the allocation bitmap. So,
342 * for sake of simplicity, they are allocated as one continous chunk for
343 * simplicity/laziness. */
344 uint64_t *pbmAlloc;
345 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
346 uint32_t cUnitsPerChunk;
347 /** Number of bitmap elements per chunk (for quickly locating the bitmap
348 * portion corresponding to an chunk). */
349 uint32_t cBitmapElementsPerChunk;
350#else
351 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
352 * @{ */
353 /** The size of the heap internal block header. This is used to adjust the
354 * request memory size to make sure there is exacly enough room for a header at
355 * the end of the blocks we allocate before the next 64 byte alignment line. */
356 uint32_t cbHeapBlockHdr;
357 /** The size of initial heap allocation required make sure the first
358 * allocation is correctly aligned. */
359 uint32_t cbHeapAlignTweak;
360 /** The alignment tweak allocation address. */
361 void *pvAlignTweak;
362 /** @} */
363#endif
364
365#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
366 /** Pointer to the array of unwind info running parallel to aChunks (same
367 * allocation as this structure, located after the bitmaps).
368 * (For Windows, the structures must reside in 32-bit RVA distance to the
369 * actual chunk, so they are allocated off the chunk.) */
370 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
371#endif
372
373 /** The allocation chunks. */
374 RT_FLEXIBLE_ARRAY_EXTENSION
375 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
376} IEMEXECMEMALLOCATOR;
377/** Pointer to an executable memory allocator. */
378typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
379
380/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
381#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
382
383
384static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
385
386
387/**
388 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
389 * the heap statistics.
390 */
391static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
392 uint32_t cbReq, uint32_t idxChunk)
393{
394 pExecMemAllocator->cAllocations += 1;
395 pExecMemAllocator->cbAllocated += cbReq;
396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
397 pExecMemAllocator->cbFree -= cbReq;
398#else
399 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
400#endif
401 pExecMemAllocator->idxChunkHint = idxChunk;
402
403#ifdef RT_OS_DARWIN
404 /*
405 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
406 * on darwin. So, we mark the pages returned as read+write after alloc and
407 * expect the caller to call iemExecMemAllocatorReadyForUse when done
408 * writing to the allocation.
409 *
410 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
411 * for details.
412 */
413 /** @todo detect if this is necessary... it wasn't required on 10.15 or
414 * whatever older version it was. */
415 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
416 AssertRC(rc);
417#endif
418
419 return pvRet;
420}
421
422
423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
424static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
425 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
426{
427 /*
428 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
429 */
430 Assert(!(cToScan & 63));
431 Assert(!(idxFirst & 63));
432 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
433 pbmAlloc += idxFirst / 64;
434
435 /*
436 * Scan the bitmap for cReqUnits of consequtive clear bits
437 */
438 /** @todo This can probably be done more efficiently for non-x86 systems. */
439 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
440 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
441 {
442 uint32_t idxAddBit = 1;
443 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
444 idxAddBit++;
445 if (idxAddBit >= cReqUnits)
446 {
447 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
448
449 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
450 pChunk->cFreeUnits -= cReqUnits;
451 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
452
453 void * const pvRet = (uint8_t *)pChunk->pvChunk
454 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
455
456 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
457 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
458 }
459
460 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
461 }
462 return NULL;
463}
464#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
465
466
467static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
468{
469#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
470 /*
471 * Figure out how much to allocate.
472 */
473 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
482 if (pvRet)
483 return pvRet;
484 }
485 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
486 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
487 cReqUnits, idxChunk);
488 }
489#else
490 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
491 if (pvRet)
492 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
493#endif
494 return NULL;
495
496}
497
498
499/**
500 * Allocates @a cbReq bytes of executable memory.
501 *
502 * @returns Pointer to the memory, NULL if out of memory or other problem
503 * encountered.
504 * @param pVCpu The cross context virtual CPU structure of the calling
505 * thread.
506 * @param cbReq How many bytes are required.
507 */
508static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
509{
510 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
511 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
512 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
513
514 /*
515 * Adjust the request size so it'll fit the allocator alignment/whatnot.
516 *
517 * For the RTHeapSimple allocator this means to follow the logic described
518 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
519 * existing chunks if we think we've got sufficient free memory around.
520 *
521 * While for the alternative one we just align it up to a whole unit size.
522 */
523#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
524 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
525#else
526 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
527#endif
528 if (cbReq <= pExecMemAllocator->cbFree)
529 {
530 uint32_t const cChunks = pExecMemAllocator->cChunks;
531 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
532 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
533 {
534 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
535 if (pvRet)
536 return pvRet;
537 }
538 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
539 {
540 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
541 if (pvRet)
542 return pvRet;
543 }
544 }
545
546 /*
547 * Can we grow it with another chunk?
548 */
549 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
550 {
551 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
552 AssertLogRelRCReturn(rc, NULL);
553
554 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
555 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
556 if (pvRet)
557 return pvRet;
558 AssertFailed();
559 }
560
561 /* What now? Prune native translation blocks from the cache? */
562 AssertFailed();
563 return NULL;
564}
565
566
567/** This is a hook that we may need later for changing memory protection back
568 * to readonly+exec */
569static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
570{
571#ifdef RT_OS_DARWIN
572 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
573 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
574 AssertRC(rc); RT_NOREF(pVCpu);
575
576 /*
577 * Flush the instruction cache:
578 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
579 */
580 /* sys_dcache_flush(pv, cb); - not necessary */
581 sys_icache_invalidate(pv, cb);
582#else
583 RT_NOREF(pVCpu, pv, cb);
584#endif
585}
586
587
588/**
589 * Frees executable memory.
590 */
591void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
592{
593 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
594 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
595 Assert(pv);
596#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
597 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
598#else
599 Assert(!((uintptr_t)pv & 63));
600#endif
601
602 /* Align the size as we did when allocating the block. */
603#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
604 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
605#else
606 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
607#endif
608
609 /* Free it / assert sanity. */
610#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
611 uint32_t const cChunks = pExecMemAllocator->cChunks;
612 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
613 bool fFound = false;
614 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
615 {
616 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
617 fFound = offChunk < cbChunk;
618 if (fFound)
619 {
620#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
621 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
622 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
623
624 /* Check that it's valid and free it. */
625 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
626 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
627 for (uint32_t i = 1; i < cReqUnits; i++)
628 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
629 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
630
631 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
632 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
633
634 /* Update the stats. */
635 pExecMemAllocator->cbAllocated -= cb;
636 pExecMemAllocator->cbFree += cb;
637 pExecMemAllocator->cAllocations -= 1;
638 return;
639#else
640 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
641 break;
642#endif
643 }
644 }
645# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
646 AssertFailed();
647# else
648 Assert(fFound);
649# endif
650#endif
651
652#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
653 /* Update stats while cb is freshly calculated.*/
654 pExecMemAllocator->cbAllocated -= cb;
655 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
656 pExecMemAllocator->cAllocations -= 1;
657
658 /* Free it. */
659 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
660#endif
661}
662
663
664
665#ifdef IN_RING3
666# ifdef RT_OS_WINDOWS
667
668/**
669 * Initializes the unwind info structures for windows hosts.
670 */
671static int
672iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
673 void *pvChunk, uint32_t idxChunk)
674{
675 RT_NOREF(pVCpu);
676
677 /*
678 * The AMD64 unwind opcodes.
679 *
680 * This is a program that starts with RSP after a RET instruction that
681 * ends up in recompiled code, and the operations we describe here will
682 * restore all non-volatile registers and bring RSP back to where our
683 * RET address is. This means it's reverse order from what happens in
684 * the prologue.
685 *
686 * Note! Using a frame register approach here both because we have one
687 * and but mainly because the UWOP_ALLOC_LARGE argument values
688 * would be a pain to write initializers for. On the positive
689 * side, we're impervious to changes in the the stack variable
690 * area can can deal with dynamic stack allocations if necessary.
691 */
692 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
693 {
694 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
695 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
696 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
697 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
698 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
699 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
700 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
701 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
702 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
703 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
704 };
705 union
706 {
707 IMAGE_UNWIND_INFO Info;
708 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
709 } s_UnwindInfo =
710 {
711 {
712 /* .Version = */ 1,
713 /* .Flags = */ 0,
714 /* .SizeOfProlog = */ 16, /* whatever */
715 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
716 /* .FrameRegister = */ X86_GREG_xBP,
717 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
718 }
719 };
720 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
721 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
722
723 /*
724 * Calc how much space we need and allocate it off the exec heap.
725 */
726 unsigned const cFunctionEntries = 1;
727 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
728 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
729# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
730 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
731 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
732 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
733# else
734 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
735 - pExecMemAllocator->cbHeapBlockHdr;
736 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
737 32 /*cbAlignment*/);
738# endif
739 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
740 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
741
742 /*
743 * Initialize the structures.
744 */
745 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
746
747 paFunctions[0].BeginAddress = 0;
748 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
749 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
750
751 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
752 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
753
754 /*
755 * Register it.
756 */
757 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
758 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
759
760 return VINF_SUCCESS;
761}
762
763
764# else /* !RT_OS_WINDOWS */
765
766/**
767 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
768 */
769DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
770{
771 if (iValue >= 64)
772 {
773 Assert(iValue < 0x2000);
774 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
775 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
776 }
777 else if (iValue >= 0)
778 *Ptr.pb++ = (uint8_t)iValue;
779 else if (iValue > -64)
780 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
781 else
782 {
783 Assert(iValue > -0x2000);
784 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
785 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
786 }
787 return Ptr;
788}
789
790
791/**
792 * Emits an ULEB128 encoded value (up to 64-bit wide).
793 */
794DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
795{
796 while (uValue >= 0x80)
797 {
798 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
799 uValue >>= 7;
800 }
801 *Ptr.pb++ = (uint8_t)uValue;
802 return Ptr;
803}
804
805
806/**
807 * Emits a CFA rule as register @a uReg + offset @a off.
808 */
809DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
810{
811 *Ptr.pb++ = DW_CFA_def_cfa;
812 Ptr = iemDwarfPutUleb128(Ptr, uReg);
813 Ptr = iemDwarfPutUleb128(Ptr, off);
814 return Ptr;
815}
816
817
818/**
819 * Emits a register (@a uReg) save location:
820 * CFA + @a off * data_alignment_factor
821 */
822DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
823{
824 if (uReg < 0x40)
825 *Ptr.pb++ = DW_CFA_offset | uReg;
826 else
827 {
828 *Ptr.pb++ = DW_CFA_offset_extended;
829 Ptr = iemDwarfPutUleb128(Ptr, uReg);
830 }
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836# if 0 /* unused */
837/**
838 * Emits a register (@a uReg) save location, using signed offset:
839 * CFA + @a offSigned * data_alignment_factor
840 */
841DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
842{
843 *Ptr.pb++ = DW_CFA_offset_extended_sf;
844 Ptr = iemDwarfPutUleb128(Ptr, uReg);
845 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
846 return Ptr;
847}
848# endif
849
850
851/**
852 * Initializes the unwind info section for non-windows hosts.
853 */
854static int
855iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
856 void *pvChunk, uint32_t idxChunk)
857{
858 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
859 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
860
861 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
862
863 /*
864 * Generate the CIE first.
865 */
866# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
867 uint8_t const iDwarfVer = 3;
868# else
869 uint8_t const iDwarfVer = 4;
870# endif
871 RTPTRUNION const PtrCie = Ptr;
872 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
873 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
874 *Ptr.pb++ = iDwarfVer; /* DwARF version */
875 *Ptr.pb++ = 0; /* Augmentation. */
876 if (iDwarfVer >= 4)
877 {
878 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
879 *Ptr.pb++ = 0; /* Segment selector size. */
880 }
881# ifdef RT_ARCH_AMD64
882 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
883# else
884 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
885# endif
886 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
887# ifdef RT_ARCH_AMD64
888 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
889# elif defined(RT_ARCH_ARM64)
890 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
891# else
892# error "port me"
893# endif
894 /* Initial instructions: */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
897 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
898 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
899 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
904# elif defined(RT_ARCH_ARM64)
905# if 1
906 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
907# else
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
909# endif
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
922 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
923 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
924# else
925# error "port me"
926# endif
927 while ((Ptr.u - PtrCie.u) & 3)
928 *Ptr.pb++ = DW_CFA_nop;
929 /* Finalize the CIE size. */
930 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
931
932 /*
933 * Generate an FDE for the whole chunk area.
934 */
935# ifdef IEMNATIVE_USE_LIBUNWIND
936 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
937# endif
938 RTPTRUNION const PtrFde = Ptr;
939 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
940 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
941 Ptr.pu32++;
942 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
943 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
944# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
945 *Ptr.pb++ = DW_CFA_nop;
946# endif
947 while ((Ptr.u - PtrFde.u) & 3)
948 *Ptr.pb++ = DW_CFA_nop;
949 /* Finalize the FDE size. */
950 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
951
952 /* Terminator entry. */
953 *Ptr.pu32++ = 0;
954 *Ptr.pu32++ = 0; /* just to be sure... */
955 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
956
957 /*
958 * Register it.
959 */
960# ifdef IEMNATIVE_USE_LIBUNWIND
961 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
962# else
963 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
964 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
965# endif
966
967# ifdef IEMNATIVE_USE_GDB_JIT
968 /*
969 * Now for telling GDB about this (experimental).
970 *
971 * This seems to work best with ET_DYN.
972 */
973 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
974# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
975 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
976 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
977# else
978 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
979 - pExecMemAllocator->cbHeapBlockHdr;
980 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
981# endif
982 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
983 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
984
985 RT_ZERO(*pSymFile);
986
987 /*
988 * The ELF header:
989 */
990 pSymFile->EHdr.e_ident[0] = ELFMAG0;
991 pSymFile->EHdr.e_ident[1] = ELFMAG1;
992 pSymFile->EHdr.e_ident[2] = ELFMAG2;
993 pSymFile->EHdr.e_ident[3] = ELFMAG3;
994 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
995 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
996 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
997 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
998# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
999 pSymFile->EHdr.e_type = ET_DYN;
1000# else
1001 pSymFile->EHdr.e_type = ET_REL;
1002# endif
1003# ifdef RT_ARCH_AMD64
1004 pSymFile->EHdr.e_machine = EM_AMD64;
1005# elif defined(RT_ARCH_ARM64)
1006 pSymFile->EHdr.e_machine = EM_AARCH64;
1007# else
1008# error "port me"
1009# endif
1010 pSymFile->EHdr.e_version = 1; /*?*/
1011 pSymFile->EHdr.e_entry = 0;
1012# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1013 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1014# else
1015 pSymFile->EHdr.e_phoff = 0;
1016# endif
1017 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1018 pSymFile->EHdr.e_flags = 0;
1019 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1022 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1023# else
1024 pSymFile->EHdr.e_phentsize = 0;
1025 pSymFile->EHdr.e_phnum = 0;
1026# endif
1027 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1028 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1029 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1030
1031 uint32_t offStrTab = 0;
1032#define APPEND_STR(a_szStr) do { \
1033 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1034 offStrTab += sizeof(a_szStr); \
1035 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1036 } while (0)
1037#define APPEND_STR_FMT(a_szStr, ...) do { \
1038 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1039 offStrTab++; \
1040 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1041 } while (0)
1042
1043 /*
1044 * Section headers.
1045 */
1046 /* Section header #0: NULL */
1047 unsigned i = 0;
1048 APPEND_STR("");
1049 RT_ZERO(pSymFile->aShdrs[i]);
1050 i++;
1051
1052 /* Section header: .eh_frame */
1053 pSymFile->aShdrs[i].sh_name = offStrTab;
1054 APPEND_STR(".eh_frame");
1055 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1056 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1057# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1058 pSymFile->aShdrs[i].sh_offset
1059 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1060# else
1061 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1062 pSymFile->aShdrs[i].sh_offset = 0;
1063# endif
1064
1065 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1066 pSymFile->aShdrs[i].sh_link = 0;
1067 pSymFile->aShdrs[i].sh_info = 0;
1068 pSymFile->aShdrs[i].sh_addralign = 1;
1069 pSymFile->aShdrs[i].sh_entsize = 0;
1070 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1071 i++;
1072
1073 /* Section header: .shstrtab */
1074 unsigned const iShStrTab = i;
1075 pSymFile->EHdr.e_shstrndx = iShStrTab;
1076 pSymFile->aShdrs[i].sh_name = offStrTab;
1077 APPEND_STR(".shstrtab");
1078 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1079 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1080# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1081 pSymFile->aShdrs[i].sh_offset
1082 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1083# else
1084 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1085 pSymFile->aShdrs[i].sh_offset = 0;
1086# endif
1087 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1088 pSymFile->aShdrs[i].sh_link = 0;
1089 pSymFile->aShdrs[i].sh_info = 0;
1090 pSymFile->aShdrs[i].sh_addralign = 1;
1091 pSymFile->aShdrs[i].sh_entsize = 0;
1092 i++;
1093
1094 /* Section header: .symbols */
1095 pSymFile->aShdrs[i].sh_name = offStrTab;
1096 APPEND_STR(".symtab");
1097 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1098 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1102 pSymFile->aShdrs[i].sh_link = iShStrTab;
1103 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1104 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1105 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1106 i++;
1107
1108# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1109 /* Section header: .symbols */
1110 pSymFile->aShdrs[i].sh_name = offStrTab;
1111 APPEND_STR(".dynsym");
1112 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1113 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1114 pSymFile->aShdrs[i].sh_offset
1115 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1116 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1117 pSymFile->aShdrs[i].sh_link = iShStrTab;
1118 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1119 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1120 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1121 i++;
1122# endif
1123
1124# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1125 /* Section header: .dynamic */
1126 pSymFile->aShdrs[i].sh_name = offStrTab;
1127 APPEND_STR(".dynamic");
1128 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1129 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1130 pSymFile->aShdrs[i].sh_offset
1131 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1132 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1133 pSymFile->aShdrs[i].sh_link = iShStrTab;
1134 pSymFile->aShdrs[i].sh_info = 0;
1135 pSymFile->aShdrs[i].sh_addralign = 1;
1136 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1137 i++;
1138# endif
1139
1140 /* Section header: .text */
1141 unsigned const iShText = i;
1142 pSymFile->aShdrs[i].sh_name = offStrTab;
1143 APPEND_STR(".text");
1144 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1145 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1146# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1147 pSymFile->aShdrs[i].sh_offset
1148 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1149# else
1150 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1151 pSymFile->aShdrs[i].sh_offset = 0;
1152# endif
1153 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1154 pSymFile->aShdrs[i].sh_link = 0;
1155 pSymFile->aShdrs[i].sh_info = 0;
1156 pSymFile->aShdrs[i].sh_addralign = 1;
1157 pSymFile->aShdrs[i].sh_entsize = 0;
1158 i++;
1159
1160 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1161
1162# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1163 /*
1164 * The program headers:
1165 */
1166 /* Everything in a single LOAD segment: */
1167 i = 0;
1168 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1169 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1170 pSymFile->aPhdrs[i].p_offset
1171 = pSymFile->aPhdrs[i].p_vaddr
1172 = pSymFile->aPhdrs[i].p_paddr = 0;
1173 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1174 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1175 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1176 i++;
1177 /* The .dynamic segment. */
1178 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1179 pSymFile->aPhdrs[i].p_flags = PF_R;
1180 pSymFile->aPhdrs[i].p_offset
1181 = pSymFile->aPhdrs[i].p_vaddr
1182 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1183 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1184 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1185 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1186 i++;
1187
1188 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1189
1190 /*
1191 * The dynamic section:
1192 */
1193 i = 0;
1194 pSymFile->aDyn[i].d_tag = DT_SONAME;
1195 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1196 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1197 i++;
1198 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1199 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1202 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1205 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1206 i++;
1207 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1208 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_NULL;
1211 i++;
1212 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1213# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1214
1215 /*
1216 * Symbol tables:
1217 */
1218 /** @todo gdb doesn't seem to really like this ... */
1219 i = 0;
1220 pSymFile->aSymbols[i].st_name = 0;
1221 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1222 pSymFile->aSymbols[i].st_value = 0;
1223 pSymFile->aSymbols[i].st_size = 0;
1224 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1225 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1226# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1227 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1228# endif
1229 i++;
1230
1231 pSymFile->aSymbols[i].st_name = 0;
1232 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1233 pSymFile->aSymbols[i].st_value = 0;
1234 pSymFile->aSymbols[i].st_size = 0;
1235 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1236 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = offStrTab;
1240 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1241# if 0
1242 pSymFile->aSymbols[i].st_shndx = iShText;
1243 pSymFile->aSymbols[i].st_value = 0;
1244# else
1245 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1246 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1247# endif
1248 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1252 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1253 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1254# endif
1255 i++;
1256
1257 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1258 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1259
1260 /*
1261 * The GDB JIT entry and informing GDB.
1262 */
1263 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1264# if 1
1265 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1266# else
1267 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1268# endif
1269
1270 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1271 RTCritSectEnter(&g_IemNativeGdbJitLock);
1272 pEhFrame->GdbJitEntry.pNext = NULL;
1273 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1274 if (__jit_debug_descriptor.pTail)
1275 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1276 else
1277 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1278 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1279 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1280
1281 /* Notify GDB: */
1282 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1283 __jit_debug_register_code();
1284 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1285 RTCritSectLeave(&g_IemNativeGdbJitLock);
1286
1287# else /* !IEMNATIVE_USE_GDB_JIT */
1288 RT_NOREF(pVCpu);
1289# endif /* !IEMNATIVE_USE_GDB_JIT */
1290
1291 return VINF_SUCCESS;
1292}
1293
1294# endif /* !RT_OS_WINDOWS */
1295#endif /* IN_RING3 */
1296
1297
1298/**
1299 * Adds another chunk to the executable memory allocator.
1300 *
1301 * This is used by the init code for the initial allocation and later by the
1302 * regular allocator function when it's out of memory.
1303 */
1304static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1305{
1306 /* Check that we've room for growth. */
1307 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1308 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1309
1310 /* Allocate a chunk. */
1311#ifdef RT_OS_DARWIN
1312 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1313#else
1314 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1315#endif
1316 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1317
1318#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1319 int rc = VINF_SUCCESS;
1320#else
1321 /* Initialize the heap for the chunk. */
1322 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1323 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1324 AssertRC(rc);
1325 if (RT_SUCCESS(rc))
1326 {
1327 /*
1328 * We want the memory to be aligned on 64 byte, so the first time thru
1329 * here we do some exploratory allocations to see how we can achieve this.
1330 * On subsequent runs we only make an initial adjustment allocation, if
1331 * necessary.
1332 *
1333 * Since we own the heap implementation, we know that the internal block
1334 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1335 * so all we need to wrt allocation size adjustments is to add 32 bytes
1336 * to the size, align up by 64 bytes, and subtract 32 bytes.
1337 *
1338 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1339 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1340 * allocation to force subsequent allocations to return 64 byte aligned
1341 * user areas.
1342 */
1343 if (!pExecMemAllocator->cbHeapBlockHdr)
1344 {
1345 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1346 pExecMemAllocator->cbHeapAlignTweak = 64;
1347 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1348 32 /*cbAlignment*/);
1349 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1350
1351 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1352 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1353 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1354 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1355 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1356
1357 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1358 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1359 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1360 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1361 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1362
1363 RTHeapSimpleFree(hHeap, pvTest2);
1364 RTHeapSimpleFree(hHeap, pvTest1);
1365 }
1366 else
1367 {
1368 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1369 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1370 }
1371 if (RT_SUCCESS(rc))
1372#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1373 {
1374 /*
1375 * Add the chunk.
1376 *
1377 * This must be done before the unwind init so windows can allocate
1378 * memory from the chunk when using the alternative sub-allocator.
1379 */
1380 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1381#ifdef IN_RING3
1382 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1383#endif
1384#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1385 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1386#else
1387 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1388 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1389 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1390 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1391#endif
1392
1393 pExecMemAllocator->cChunks = idxChunk + 1;
1394 pExecMemAllocator->idxChunkHint = idxChunk;
1395
1396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1397 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1398 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1399#else
1400 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1401 pExecMemAllocator->cbTotal += cbFree;
1402 pExecMemAllocator->cbFree += cbFree;
1403#endif
1404
1405#ifdef IN_RING3
1406 /*
1407 * Initialize the unwind information (this cannot really fail atm).
1408 * (This sets pvUnwindInfo.)
1409 */
1410 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1411 if (RT_SUCCESS(rc))
1412#endif
1413 {
1414 return VINF_SUCCESS;
1415 }
1416
1417#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1418 /* Just in case the impossible happens, undo the above up: */
1419 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1420 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1421 pExecMemAllocator->cChunks = idxChunk;
1422 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1423 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1424 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1425 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1426#endif
1427 }
1428#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1429 }
1430#endif
1431 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1432 RT_NOREF(pVCpu);
1433 return rc;
1434}
1435
1436
1437/**
1438 * Initializes the executable memory allocator for native recompilation on the
1439 * calling EMT.
1440 *
1441 * @returns VBox status code.
1442 * @param pVCpu The cross context virtual CPU structure of the calling
1443 * thread.
1444 * @param cbMax The max size of the allocator.
1445 * @param cbInitial The initial allocator size.
1446 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1447 * dependent).
1448 */
1449int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1450{
1451 /*
1452 * Validate input.
1453 */
1454 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1455 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1456 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1457 || cbChunk == 0
1458 || ( RT_IS_POWER_OF_TWO(cbChunk)
1459 && cbChunk >= _1M
1460 && cbChunk <= _256M
1461 && cbChunk <= cbMax),
1462 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1463 VERR_OUT_OF_RANGE);
1464
1465 /*
1466 * Adjust/figure out the chunk size.
1467 */
1468 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1469 {
1470 if (cbMax >= _256M)
1471 cbChunk = _64M;
1472 else
1473 {
1474 if (cbMax < _16M)
1475 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1476 else
1477 cbChunk = (uint32_t)cbMax / 4;
1478 if (!RT_IS_POWER_OF_TWO(cbChunk))
1479 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1480 }
1481 }
1482
1483 if (cbChunk > cbMax)
1484 cbMax = cbChunk;
1485 else
1486 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1487 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1488 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1489
1490 /*
1491 * Allocate and initialize the allocatore instance.
1492 */
1493 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1494#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1495 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1496 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1497 cbNeeded += cbBitmap * cMaxChunks;
1498 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1499 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1500#endif
1501#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1502 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1503 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1504#endif
1505 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1506 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1507 VERR_NO_MEMORY);
1508 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1509 pExecMemAllocator->cbChunk = cbChunk;
1510 pExecMemAllocator->cMaxChunks = cMaxChunks;
1511 pExecMemAllocator->cChunks = 0;
1512 pExecMemAllocator->idxChunkHint = 0;
1513 pExecMemAllocator->cAllocations = 0;
1514 pExecMemAllocator->cbTotal = 0;
1515 pExecMemAllocator->cbFree = 0;
1516 pExecMemAllocator->cbAllocated = 0;
1517#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1518 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1519 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1520 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1521 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1522#endif
1523#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1524 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1525#endif
1526 for (uint32_t i = 0; i < cMaxChunks; i++)
1527 {
1528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1529 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1530 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1531#else
1532 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1533#endif
1534 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1535#ifdef IN_RING0
1536 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1537#else
1538 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1539#endif
1540 }
1541 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1542
1543 /*
1544 * Do the initial allocations.
1545 */
1546 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1547 {
1548 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1549 AssertLogRelRCReturn(rc, rc);
1550 }
1551
1552 pExecMemAllocator->idxChunkHint = 0;
1553
1554 return VINF_SUCCESS;
1555}
1556
1557
1558/*********************************************************************************************************************************
1559* Native Recompilation *
1560*********************************************************************************************************************************/
1561
1562
1563/**
1564 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1565 */
1566IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1567{
1568 pVCpu->iem.s.cInstructions += idxInstr;
1569 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1570}
1571
1572
1573/**
1574 * Used by TB code when it wants to raise a \#GP(0).
1575 */
1576IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1577{
1578#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1579 pVCpu->iem.s.idxTbCurInstr = idxInstr;
1580#else
1581 RT_NOREF(idxInstr);
1582#endif
1583 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1584#ifndef _MSC_VER
1585 return VINF_IEM_RAISED_XCPT; /* not reached */
1586#endif
1587}
1588
1589
1590/*********************************************************************************************************************************
1591* Helpers: Segmented memory fetches and stores. *
1592*********************************************************************************************************************************/
1593
1594/**
1595 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1596 */
1597IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1598{
1599 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1600}
1601
1602
1603/**
1604 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1605 * to 16 bits.
1606 */
1607IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1608{
1609 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1610}
1611
1612
1613/**
1614 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1615 * to 32 bits.
1616 */
1617IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1618{
1619 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1620}
1621
1622/**
1623 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1624 * to 64 bits.
1625 */
1626IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1627{
1628 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1629}
1630
1631
1632/**
1633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1634 */
1635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1636{
1637 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1638}
1639
1640
1641/**
1642 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1643 * to 32 bits.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1646{
1647 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1648}
1649
1650
1651/**
1652 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1653 * to 64 bits.
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1656{
1657 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1658}
1659
1660
1661/**
1662 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1663 */
1664IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1665{
1666 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1667}
1668
1669
1670/**
1671 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1672 * to 64 bits.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1675{
1676 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1677}
1678
1679
1680/**
1681 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1682 */
1683IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1684{
1685 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1686}
1687
1688
1689/**
1690 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1691 */
1692IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1693{
1694 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1695}
1696
1697
1698/**
1699 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1702{
1703 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1704}
1705
1706
1707/**
1708 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1709 */
1710IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1711{
1712 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1713}
1714
1715
1716/**
1717 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1720{
1721 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1722}
1723
1724
1725
1726/*********************************************************************************************************************************
1727* Helpers: Flat memory fetches and stores. *
1728*********************************************************************************************************************************/
1729
1730/**
1731 * Used by TB code to load unsigned 8-bit data w/ flat address.
1732 * @note Zero extending the value to 64-bit to simplify assembly.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1735{
1736 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1737}
1738
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1742 * to 16 bits.
1743 * @note Zero extending the value to 64-bit to simplify assembly.
1744 */
1745IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1746{
1747 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1753 * to 32 bits.
1754 * @note Zero extending the value to 64-bit to simplify assembly.
1755 */
1756IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1757{
1758 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1759}
1760
1761
1762/**
1763 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1764 * to 64 bits.
1765 */
1766IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1767{
1768 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1769}
1770
1771
1772/**
1773 * Used by TB code to load unsigned 16-bit data w/ flat address.
1774 * @note Zero extending the value to 64-bit to simplify assembly.
1775 */
1776IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1777{
1778 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1779}
1780
1781
1782/**
1783 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1784 * to 32 bits.
1785 * @note Zero extending the value to 64-bit to simplify assembly.
1786 */
1787IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1788{
1789 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1790}
1791
1792
1793/**
1794 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1795 * to 64 bits.
1796 * @note Zero extending the value to 64-bit to simplify assembly.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1799{
1800 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1801}
1802
1803
1804/**
1805 * Used by TB code to load unsigned 32-bit data w/ flat address.
1806 * @note Zero extending the value to 64-bit to simplify assembly.
1807 */
1808IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1809{
1810 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1811}
1812
1813
1814/**
1815 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1816 * to 64 bits.
1817 * @note Zero extending the value to 64-bit to simplify assembly.
1818 */
1819IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1820{
1821 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1822}
1823
1824
1825/**
1826 * Used by TB code to load unsigned 64-bit data w/ flat address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1829{
1830 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1831}
1832
1833
1834/**
1835 * Used by TB code to store unsigned 8-bit data w/ flat address.
1836 */
1837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1838{
1839 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1840}
1841
1842
1843/**
1844 * Used by TB code to store unsigned 16-bit data w/ flat address.
1845 */
1846IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1847{
1848 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1849}
1850
1851
1852/**
1853 * Used by TB code to store unsigned 32-bit data w/ flat address.
1854 */
1855IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1856{
1857 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 64-bit data w/ flat address.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1865{
1866 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1867}
1868
1869
1870/*********************************************************************************************************************************
1871* Helpers: Segmented memory mapping. *
1872*********************************************************************************************************************************/
1873
1874/**
1875 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1878 RTGCPTR GCPtrMem, uint8_t iSegReg))
1879{
1880 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
1881}
1882
1883
1884/**
1885 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1886 */
1887IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1888 RTGCPTR GCPtrMem, uint8_t iSegReg))
1889{
1890 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
1891}
1892
1893
1894/**
1895 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1896 */
1897IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1898 RTGCPTR GCPtrMem, uint8_t iSegReg))
1899{
1900 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1908 RTGCPTR GCPtrMem, uint8_t iSegReg))
1909{
1910 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
1911}
1912
1913
1914/**
1915 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1918 RTGCPTR GCPtrMem, uint8_t iSegReg))
1919{
1920 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
1921}
1922
1923
1924/**
1925 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1926 */
1927IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1928 RTGCPTR GCPtrMem, uint8_t iSegReg))
1929{
1930 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
1931}
1932
1933
1934/**
1935 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1936 */
1937IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1938 RTGCPTR GCPtrMem, uint8_t iSegReg))
1939{
1940 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
1941}
1942
1943
1944/**
1945 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1946 */
1947IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1948 RTGCPTR GCPtrMem, uint8_t iSegReg))
1949{
1950 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
1951}
1952
1953
1954/**
1955 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1956 */
1957IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1958 RTGCPTR GCPtrMem, uint8_t iSegReg))
1959{
1960 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
1961}
1962
1963
1964/**
1965 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1966 */
1967IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1968 RTGCPTR GCPtrMem, uint8_t iSegReg))
1969{
1970 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
1971}
1972
1973
1974/**
1975 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1976 */
1977IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1978 RTGCPTR GCPtrMem, uint8_t iSegReg))
1979{
1980 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
1981}
1982
1983
1984/**
1985 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1986 */
1987IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1988 RTGCPTR GCPtrMem, uint8_t iSegReg))
1989{
1990 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
1991}
1992
1993
1994/**
1995 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1996 */
1997IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1998 RTGCPTR GCPtrMem, uint8_t iSegReg))
1999{
2000 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
2001}
2002
2003
2004/**
2005 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2006 */
2007IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2008 RTGCPTR GCPtrMem, uint8_t iSegReg))
2009{
2010 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
2011}
2012
2013
2014/**
2015 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2016 */
2017IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2018 RTGCPTR GCPtrMem, uint8_t iSegReg))
2019{
2020 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
2021}
2022
2023
2024/**
2025 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2026 */
2027IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2028 RTGCPTR GCPtrMem, uint8_t iSegReg))
2029{
2030 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
2031}
2032
2033
2034/**
2035 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2036 */
2037IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2038 RTGCPTR GCPtrMem, uint8_t iSegReg))
2039{
2040 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
2041}
2042
2043
2044/*********************************************************************************************************************************
2045* Helpers: Flat memory mapping. *
2046*********************************************************************************************************************************/
2047
2048/**
2049 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2050 */
2051IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2052{
2053 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
2054}
2055
2056
2057/**
2058 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2059 */
2060IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2061{
2062 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
2063}
2064
2065
2066/**
2067 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2068 */
2069IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2070{
2071 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
2072}
2073
2074
2075/**
2076 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2079{
2080 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
2081}
2082
2083
2084/**
2085 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2086 */
2087IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2088{
2089 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
2090}
2091
2092
2093/**
2094 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2095 */
2096IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2097{
2098 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
2099}
2100
2101
2102/**
2103 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2104 */
2105IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2106{
2107 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
2108}
2109
2110
2111/**
2112 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2113 */
2114IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2115{
2116 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
2117}
2118
2119
2120/**
2121 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2122 */
2123IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2124{
2125 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
2126}
2127
2128
2129/**
2130 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2131 */
2132IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2133{
2134 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
2135}
2136
2137
2138/**
2139 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2140 */
2141IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2142{
2143 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
2144}
2145
2146
2147/**
2148 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2149 */
2150IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2151{
2152 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
2153}
2154
2155
2156/**
2157 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2160{
2161 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
2162}
2163
2164
2165/**
2166 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2167 */
2168IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2169{
2170 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
2171}
2172
2173
2174/**
2175 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2176 */
2177IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2178{
2179 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
2180}
2181
2182
2183/**
2184 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2187{
2188 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
2189}
2190
2191
2192/**
2193 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2194 */
2195IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2196{
2197 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
2198}
2199
2200
2201/*********************************************************************************************************************************
2202* Helpers: Commit, rollback & unmap *
2203*********************************************************************************************************************************/
2204
2205/**
2206 * Used by TB code to commit and unmap a read-write memory mapping.
2207 */
2208IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2209{
2210 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2211}
2212
2213
2214/**
2215 * Used by TB code to commit and unmap a write-only memory mapping.
2216 */
2217IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2218{
2219 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2220}
2221
2222
2223/**
2224 * Used by TB code to commit and unmap a read-only memory mapping.
2225 */
2226IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2227{
2228 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2229}
2230
2231
2232/**
2233 * Reinitializes the native recompiler state.
2234 *
2235 * Called before starting a new recompile job.
2236 */
2237static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2238{
2239 pReNative->cLabels = 0;
2240 pReNative->bmLabelTypes = 0;
2241 pReNative->cFixups = 0;
2242#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2243 pReNative->pDbgInfo->cEntries = 0;
2244#endif
2245 pReNative->pTbOrg = pTb;
2246 pReNative->cCondDepth = 0;
2247 pReNative->uCondSeqNo = 0;
2248 pReNative->uCheckIrqSeqNo = 0;
2249 pReNative->uTlbSeqNo = 0;
2250
2251 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2252#if IEMNATIVE_HST_GREG_COUNT < 32
2253 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2254#endif
2255 ;
2256 pReNative->Core.bmHstRegsWithGstShadow = 0;
2257 pReNative->Core.bmGstRegShadows = 0;
2258 pReNative->Core.bmVars = 0;
2259 pReNative->Core.bmStack = 0;
2260 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2261 pReNative->Core.u64ArgVars = UINT64_MAX;
2262
2263 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 6);
2264 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2265 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2266 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2267 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2268 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2269 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2270
2271 /* Full host register reinit: */
2272 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2273 {
2274 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2275 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2276 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2277 }
2278
2279 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2280 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2281#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2282 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2283#endif
2284#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2285 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2286#endif
2287 );
2288 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2289 {
2290 fRegs &= ~RT_BIT_32(idxReg);
2291 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2292 }
2293
2294 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2295#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2296 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2297#endif
2298#ifdef IEMNATIVE_REG_FIXED_TMP0
2299 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2300#endif
2301 return pReNative;
2302}
2303
2304
2305/**
2306 * Allocates and initializes the native recompiler state.
2307 *
2308 * This is called the first time an EMT wants to recompile something.
2309 *
2310 * @returns Pointer to the new recompiler state.
2311 * @param pVCpu The cross context virtual CPU structure of the calling
2312 * thread.
2313 * @param pTb The TB that's about to be recompiled.
2314 * @thread EMT(pVCpu)
2315 */
2316static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2317{
2318 VMCPU_ASSERT_EMT(pVCpu);
2319
2320 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2321 AssertReturn(pReNative, NULL);
2322
2323 /*
2324 * Try allocate all the buffers and stuff we need.
2325 */
2326 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2327 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2328 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2329#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2330 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2331#endif
2332 if (RT_LIKELY( pReNative->pInstrBuf
2333 && pReNative->paLabels
2334 && pReNative->paFixups)
2335#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2336 && pReNative->pDbgInfo
2337#endif
2338 )
2339 {
2340 /*
2341 * Set the buffer & array sizes on success.
2342 */
2343 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2344 pReNative->cLabelsAlloc = _8K;
2345 pReNative->cFixupsAlloc = _16K;
2346#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2347 pReNative->cDbgInfoAlloc = _16K;
2348#endif
2349
2350 /*
2351 * Done, just need to save it and reinit it.
2352 */
2353 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2354 return iemNativeReInit(pReNative, pTb);
2355 }
2356
2357 /*
2358 * Failed. Cleanup and return.
2359 */
2360 AssertFailed();
2361 RTMemFree(pReNative->pInstrBuf);
2362 RTMemFree(pReNative->paLabels);
2363 RTMemFree(pReNative->paFixups);
2364#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2365 RTMemFree(pReNative->pDbgInfo);
2366#endif
2367 RTMemFree(pReNative);
2368 return NULL;
2369}
2370
2371
2372/**
2373 * Creates a label
2374 *
2375 * If the label does not yet have a defined position,
2376 * call iemNativeLabelDefine() later to set it.
2377 *
2378 * @returns Label ID. Throws VBox status code on failure, so no need to check
2379 * the return value.
2380 * @param pReNative The native recompile state.
2381 * @param enmType The label type.
2382 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2383 * label is not yet defined (default).
2384 * @param uData Data associated with the lable. Only applicable to
2385 * certain type of labels. Default is zero.
2386 */
2387DECL_HIDDEN_THROW(uint32_t)
2388iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2389 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2390{
2391 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2392
2393 /*
2394 * Locate existing label definition.
2395 *
2396 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2397 * and uData is zero.
2398 */
2399 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2400 uint32_t const cLabels = pReNative->cLabels;
2401 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2402#ifndef VBOX_STRICT
2403 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2404 && offWhere == UINT32_MAX
2405 && uData == 0
2406#endif
2407 )
2408 {
2409#ifndef VBOX_STRICT
2410 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2411 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2412 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2413 if (idxLabel < pReNative->cLabels)
2414 return idxLabel;
2415#else
2416 for (uint32_t i = 0; i < cLabels; i++)
2417 if ( paLabels[i].enmType == enmType
2418 && paLabels[i].uData == uData)
2419 {
2420 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2421 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2422 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2423 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2425 return i;
2426 }
2427 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2428 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2429#endif
2430 }
2431
2432 /*
2433 * Make sure we've got room for another label.
2434 */
2435 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2436 { /* likely */ }
2437 else
2438 {
2439 uint32_t cNew = pReNative->cLabelsAlloc;
2440 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2441 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2442 cNew *= 2;
2443 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2444 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2445 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2446 pReNative->paLabels = paLabels;
2447 pReNative->cLabelsAlloc = cNew;
2448 }
2449
2450 /*
2451 * Define a new label.
2452 */
2453 paLabels[cLabels].off = offWhere;
2454 paLabels[cLabels].enmType = enmType;
2455 paLabels[cLabels].uData = uData;
2456 pReNative->cLabels = cLabels + 1;
2457
2458 Assert((unsigned)enmType < 64);
2459 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2460
2461 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2462 {
2463 Assert(uData == 0);
2464 pReNative->aidxUniqueLabels[enmType] = cLabels;
2465 }
2466
2467 if (offWhere != UINT32_MAX)
2468 {
2469#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2470 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2471 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2472#endif
2473 }
2474 return cLabels;
2475}
2476
2477
2478/**
2479 * Defines the location of an existing label.
2480 *
2481 * @param pReNative The native recompile state.
2482 * @param idxLabel The label to define.
2483 * @param offWhere The position.
2484 */
2485DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2486{
2487 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2488 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2489 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2490 pLabel->off = offWhere;
2491#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2492 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2493 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2494#endif
2495}
2496
2497
2498/**
2499 * Looks up a lable.
2500 *
2501 * @returns Label ID if found, UINT32_MAX if not.
2502 */
2503static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2504 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2505{
2506 Assert((unsigned)enmType < 64);
2507 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2508 {
2509 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2510 return pReNative->aidxUniqueLabels[enmType];
2511
2512 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2513 uint32_t const cLabels = pReNative->cLabels;
2514 for (uint32_t i = 0; i < cLabels; i++)
2515 if ( paLabels[i].enmType == enmType
2516 && paLabels[i].uData == uData
2517 && ( paLabels[i].off == offWhere
2518 || offWhere == UINT32_MAX
2519 || paLabels[i].off == UINT32_MAX))
2520 return i;
2521 }
2522 return UINT32_MAX;
2523}
2524
2525
2526/**
2527 * Adds a fixup.
2528 *
2529 * @throws VBox status code (int) on failure.
2530 * @param pReNative The native recompile state.
2531 * @param offWhere The instruction offset of the fixup location.
2532 * @param idxLabel The target label ID for the fixup.
2533 * @param enmType The fixup type.
2534 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2535 */
2536DECL_HIDDEN_THROW(void)
2537iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2538 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2539{
2540 Assert(idxLabel <= UINT16_MAX);
2541 Assert((unsigned)enmType <= UINT8_MAX);
2542
2543 /*
2544 * Make sure we've room.
2545 */
2546 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2547 uint32_t const cFixups = pReNative->cFixups;
2548 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2549 { /* likely */ }
2550 else
2551 {
2552 uint32_t cNew = pReNative->cFixupsAlloc;
2553 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2554 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2555 cNew *= 2;
2556 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2557 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2558 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2559 pReNative->paFixups = paFixups;
2560 pReNative->cFixupsAlloc = cNew;
2561 }
2562
2563 /*
2564 * Add the fixup.
2565 */
2566 paFixups[cFixups].off = offWhere;
2567 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2568 paFixups[cFixups].enmType = enmType;
2569 paFixups[cFixups].offAddend = offAddend;
2570 pReNative->cFixups = cFixups + 1;
2571}
2572
2573
2574/**
2575 * Slow code path for iemNativeInstrBufEnsure.
2576 */
2577DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2578{
2579 /* Double the buffer size till we meet the request. */
2580 uint32_t cNew = pReNative->cInstrBufAlloc;
2581 AssertReturn(cNew > 0, NULL);
2582 do
2583 cNew *= 2;
2584 while (cNew < off + cInstrReq);
2585
2586 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2587#ifdef RT_ARCH_ARM64
2588 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2589#else
2590 uint32_t const cbMaxInstrBuf = _2M;
2591#endif
2592 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2593
2594 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2595 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2596
2597 pReNative->cInstrBufAlloc = cNew;
2598 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2599}
2600
2601#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2602
2603/**
2604 * Grows the static debug info array used during recompilation.
2605 *
2606 * @returns Pointer to the new debug info block; throws VBox status code on
2607 * failure, so no need to check the return value.
2608 */
2609DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2610{
2611 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2612 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2613 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2614 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2615 pReNative->pDbgInfo = pDbgInfo;
2616 pReNative->cDbgInfoAlloc = cNew;
2617 return pDbgInfo;
2618}
2619
2620
2621/**
2622 * Adds a new debug info uninitialized entry, returning the pointer to it.
2623 */
2624DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2625{
2626 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2627 { /* likely */ }
2628 else
2629 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2630 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2631}
2632
2633
2634/**
2635 * Debug Info: Adds a native offset record, if necessary.
2636 */
2637static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2638{
2639 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2640
2641 /*
2642 * Search backwards to see if we've got a similar record already.
2643 */
2644 uint32_t idx = pDbgInfo->cEntries;
2645 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2646 while (idx-- > idxStop)
2647 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2648 {
2649 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2650 return;
2651 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2652 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2653 break;
2654 }
2655
2656 /*
2657 * Add it.
2658 */
2659 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2660 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2661 pEntry->NativeOffset.offNative = off;
2662}
2663
2664
2665/**
2666 * Debug Info: Record info about a label.
2667 */
2668static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2669{
2670 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2671 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2672 pEntry->Label.uUnused = 0;
2673 pEntry->Label.enmLabel = (uint8_t)enmType;
2674 pEntry->Label.uData = uData;
2675}
2676
2677
2678/**
2679 * Debug Info: Record info about a threaded call.
2680 */
2681static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2682{
2683 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2684 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2685 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2686 pEntry->ThreadedCall.uUnused = 0;
2687 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2688}
2689
2690
2691/**
2692 * Debug Info: Record info about a new guest instruction.
2693 */
2694static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2695{
2696 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2697 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2698 pEntry->GuestInstruction.uUnused = 0;
2699 pEntry->GuestInstruction.fExec = fExec;
2700}
2701
2702
2703/**
2704 * Debug Info: Record info about guest register shadowing.
2705 */
2706static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2707 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2708{
2709 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2710 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2711 pEntry->GuestRegShadowing.uUnused = 0;
2712 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2713 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2714 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2715}
2716
2717#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2718
2719
2720/*********************************************************************************************************************************
2721* Register Allocator *
2722*********************************************************************************************************************************/
2723
2724/**
2725 * Register parameter indexes (indexed by argument number).
2726 */
2727DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2728{
2729 IEMNATIVE_CALL_ARG0_GREG,
2730 IEMNATIVE_CALL_ARG1_GREG,
2731 IEMNATIVE_CALL_ARG2_GREG,
2732 IEMNATIVE_CALL_ARG3_GREG,
2733#if defined(IEMNATIVE_CALL_ARG4_GREG)
2734 IEMNATIVE_CALL_ARG4_GREG,
2735# if defined(IEMNATIVE_CALL_ARG5_GREG)
2736 IEMNATIVE_CALL_ARG5_GREG,
2737# if defined(IEMNATIVE_CALL_ARG6_GREG)
2738 IEMNATIVE_CALL_ARG6_GREG,
2739# if defined(IEMNATIVE_CALL_ARG7_GREG)
2740 IEMNATIVE_CALL_ARG7_GREG,
2741# endif
2742# endif
2743# endif
2744#endif
2745};
2746
2747/**
2748 * Call register masks indexed by argument count.
2749 */
2750DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2751{
2752 0,
2753 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2754 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2755 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2756 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2757 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2758#if defined(IEMNATIVE_CALL_ARG4_GREG)
2759 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2760 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2761# if defined(IEMNATIVE_CALL_ARG5_GREG)
2762 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2763 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2764# if defined(IEMNATIVE_CALL_ARG6_GREG)
2765 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2766 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2767 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2768# if defined(IEMNATIVE_CALL_ARG7_GREG)
2769 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2770 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2771 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2772# endif
2773# endif
2774# endif
2775#endif
2776};
2777
2778#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2779/**
2780 * BP offset of the stack argument slots.
2781 *
2782 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2783 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2784 */
2785DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2786{
2787 IEMNATIVE_FP_OFF_STACK_ARG0,
2788# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2789 IEMNATIVE_FP_OFF_STACK_ARG1,
2790# endif
2791# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2792 IEMNATIVE_FP_OFF_STACK_ARG2,
2793# endif
2794# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2795 IEMNATIVE_FP_OFF_STACK_ARG3,
2796# endif
2797};
2798AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2799#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2800
2801/**
2802 * Info about shadowed guest register values.
2803 * @see IEMNATIVEGSTREG
2804 */
2805static struct
2806{
2807 /** Offset in VMCPU. */
2808 uint32_t off;
2809 /** The field size. */
2810 uint8_t cb;
2811 /** Name (for logging). */
2812 const char *pszName;
2813} const g_aGstShadowInfo[] =
2814{
2815#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2816 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2817 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2818 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2819 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2820 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2821 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2822 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2823 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2824 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2825 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2826 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2827 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2828 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2829 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2830 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2831 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2832 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2833 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2834 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2835 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2836 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2837 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2838 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2839 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2840 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2841 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2842 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2843 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2844 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2845 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2846 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2847 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2848 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2849 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2850 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2851 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2852#undef CPUMCTX_OFF_AND_SIZE
2853};
2854AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2855
2856
2857/** Host CPU general purpose register names. */
2858DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2859{
2860#ifdef RT_ARCH_AMD64
2861 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2862#elif RT_ARCH_ARM64
2863 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2864 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2865#else
2866# error "port me"
2867#endif
2868};
2869
2870
2871DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2872 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2873{
2874 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2875
2876 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2877 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2878 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2879 return (uint8_t)idxReg;
2880}
2881
2882
2883/**
2884 * Tries to locate a suitable register in the given register mask.
2885 *
2886 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2887 * failed.
2888 *
2889 * @returns Host register number on success, returns UINT8_MAX on failure.
2890 */
2891static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2892{
2893 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2894 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2895 if (fRegs)
2896 {
2897 /** @todo pick better here: */
2898 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2899
2900 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2901 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2902 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2903 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2904
2905 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2906 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2907 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2908 return idxReg;
2909 }
2910 return UINT8_MAX;
2911}
2912
2913
2914/**
2915 * Locate a register, possibly freeing one up.
2916 *
2917 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2918 * failed.
2919 *
2920 * @returns Host register number on success. Returns UINT8_MAX if no registers
2921 * found, the caller is supposed to deal with this and raise a
2922 * allocation type specific status code (if desired).
2923 *
2924 * @throws VBox status code if we're run into trouble spilling a variable of
2925 * recording debug info. Does NOT throw anything if we're out of
2926 * registers, though.
2927 */
2928static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2929 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2930{
2931 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2932 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2933
2934 /*
2935 * Try a freed register that's shadowing a guest register
2936 */
2937 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2938 if (fRegs)
2939 {
2940 unsigned const idxReg = (fPreferVolatile
2941 ? ASMBitFirstSetU32(fRegs)
2942 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2943 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2944 - 1;
2945
2946 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2947 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2948 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2949 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2950
2951 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2952 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2953 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2954 return idxReg;
2955 }
2956
2957 /*
2958 * Try free up a variable that's in a register.
2959 *
2960 * We do two rounds here, first evacuating variables we don't need to be
2961 * saved on the stack, then in the second round move things to the stack.
2962 */
2963 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2964 {
2965 uint32_t fVars = pReNative->Core.bmVars;
2966 while (fVars)
2967 {
2968 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2969 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2970/** @todo Prevent active variables from changing here... */
2971 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2972 && (RT_BIT_32(idxReg) & fRegMask)
2973 && ( iLoop == 0
2974 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2975 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2976 {
2977 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2978 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2979 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2980 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2981 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2982 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2983
2984 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2985 {
2986 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
2987 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2988 }
2989
2990 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2991 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2992
2993 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2994 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2995 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2996 return idxReg;
2997 }
2998 fVars &= ~RT_BIT_32(idxVar);
2999 }
3000 }
3001
3002 return UINT8_MAX;
3003}
3004
3005
3006/**
3007 * Reassigns a variable to a different register specified by the caller.
3008 *
3009 * @returns The new code buffer position.
3010 * @param pReNative The native recompile state.
3011 * @param off The current code buffer position.
3012 * @param idxVar The variable index.
3013 * @param idxRegOld The old host register number.
3014 * @param idxRegNew The new host register number.
3015 * @param pszCaller The caller for logging.
3016 */
3017static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3018 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3019{
3020 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3021 RT_NOREF(pszCaller);
3022
3023 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3024
3025 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3026 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3027 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3028 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3029
3030 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3031 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3032 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3033 if (fGstRegShadows)
3034 {
3035 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3036 | RT_BIT_32(idxRegNew);
3037 while (fGstRegShadows)
3038 {
3039 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3040 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3041
3042 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3043 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3044 }
3045 }
3046
3047 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3048 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3049 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3050 return off;
3051}
3052
3053
3054/**
3055 * Moves a variable to a different register or spills it onto the stack.
3056 *
3057 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3058 * kinds can easily be recreated if needed later.
3059 *
3060 * @returns The new code buffer position.
3061 * @param pReNative The native recompile state.
3062 * @param off The current code buffer position.
3063 * @param idxVar The variable index.
3064 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3065 * call-volatile registers.
3066 */
3067static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3068 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3069{
3070 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3071 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3072
3073 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3074 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3075 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3076 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3077 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3078 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3079 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3080 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3081 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3082
3083
3084 /** @todo Add statistics on this.*/
3085 /** @todo Implement basic variable liveness analysis (python) so variables
3086 * can be freed immediately once no longer used. This has the potential to
3087 * be trashing registers and stack for dead variables. */
3088
3089 /*
3090 * First try move it to a different register, as that's cheaper.
3091 */
3092 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3093 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3094 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3095 if (fRegs)
3096 {
3097 /* Avoid using shadow registers, if possible. */
3098 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3099 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3100 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3101 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3102 }
3103
3104 /*
3105 * Otherwise we must spill the register onto the stack.
3106 */
3107 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3108 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3109 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3110 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3111
3112 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3113 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3114 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3115 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3116 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3117 return off;
3118}
3119
3120
3121/**
3122 * Allocates a temporary host general purpose register.
3123 *
3124 * This may emit code to save register content onto the stack in order to free
3125 * up a register.
3126 *
3127 * @returns The host register number; throws VBox status code on failure,
3128 * so no need to check the return value.
3129 * @param pReNative The native recompile state.
3130 * @param poff Pointer to the variable with the code buffer position.
3131 * This will be update if we need to move a variable from
3132 * register to stack in order to satisfy the request.
3133 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
3134 * registers (@c true, default) or the other way around
3135 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3136 */
3137DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3138{
3139 /*
3140 * Try find a completely unused register, preferably a call-volatile one.
3141 */
3142 uint8_t idxReg;
3143 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3144 & ~pReNative->Core.bmHstRegsWithGstShadow
3145 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3146 if (fRegs)
3147 {
3148 if (fPreferVolatile)
3149 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3150 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3151 else
3152 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3153 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3154 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3155 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3156 }
3157 else
3158 {
3159 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3160 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3161 }
3162 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3163}
3164
3165
3166/**
3167 * Allocates a temporary register for loading an immediate value into.
3168 *
3169 * This will emit code to load the immediate, unless there happens to be an
3170 * unused register with the value already loaded.
3171 *
3172 * The caller will not modify the returned register, it must be considered
3173 * read-only. Free using iemNativeRegFreeTmpImm.
3174 *
3175 * @returns The host register number; throws VBox status code on failure, so no
3176 * need to check the return value.
3177 * @param pReNative The native recompile state.
3178 * @param poff Pointer to the variable with the code buffer position.
3179 * @param uImm The immediate value that the register must hold upon
3180 * return.
3181 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
3182 * registers (@c true, default) or the other way around
3183 * (@c false).
3184 *
3185 * @note Reusing immediate values has not been implemented yet.
3186 */
3187DECL_HIDDEN_THROW(uint8_t)
3188iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3189{
3190 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3191 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3192 return idxReg;
3193}
3194
3195
3196/**
3197 * Marks host register @a idxHstReg as containing a shadow copy of guest
3198 * register @a enmGstReg.
3199 *
3200 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3201 * host register before calling.
3202 */
3203DECL_FORCE_INLINE(void)
3204iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3205{
3206 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3207 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3208 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3209
3210 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3211 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3212 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3213 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3214#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3215 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3216 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3217#else
3218 RT_NOREF(off);
3219#endif
3220}
3221
3222
3223/**
3224 * Clear any guest register shadow claims from @a idxHstReg.
3225 *
3226 * The register does not need to be shadowing any guest registers.
3227 */
3228DECL_FORCE_INLINE(void)
3229iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3230{
3231 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3232 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3233 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3234 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3235 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3236
3237#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3238 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3239 if (fGstRegs)
3240 {
3241 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3242 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3243 while (fGstRegs)
3244 {
3245 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3246 fGstRegs &= ~RT_BIT_64(iGstReg);
3247 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3248 }
3249 }
3250#else
3251 RT_NOREF(off);
3252#endif
3253
3254 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3255 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3256 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3257}
3258
3259
3260/**
3261 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3262 * and global overview flags.
3263 */
3264DECL_FORCE_INLINE(void)
3265iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3266{
3267 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3268 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3269 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3270 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3271 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3272 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3273 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3274
3275#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3276 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3277 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3278#else
3279 RT_NOREF(off);
3280#endif
3281
3282 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3283 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3284 if (!fGstRegShadowsNew)
3285 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3286 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3287}
3288
3289
3290/**
3291 * Clear any guest register shadow claim for @a enmGstReg.
3292 */
3293DECL_FORCE_INLINE(void)
3294iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3295{
3296 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3297 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3298 {
3299 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3300 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3301 }
3302}
3303
3304
3305/**
3306 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3307 * as the new shadow of it.
3308 */
3309DECL_FORCE_INLINE(void)
3310iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3311 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3312{
3313 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3314 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3315 {
3316 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3317 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3318 return;
3319 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3320 }
3321 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3322}
3323
3324
3325/**
3326 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3327 * to @a idxRegTo.
3328 */
3329DECL_FORCE_INLINE(void)
3330iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3331 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3332{
3333 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3334 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3335 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3336 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3337 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3338 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3339 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3340 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3341 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3342
3343 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3344 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3345 if (!fGstRegShadowsFrom)
3346 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3347 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3348 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3349 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3350#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3351 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3352 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3353#else
3354 RT_NOREF(off);
3355#endif
3356}
3357
3358
3359/**
3360 * Allocates a temporary host general purpose register for keeping a guest
3361 * register value.
3362 *
3363 * Since we may already have a register holding the guest register value,
3364 * code will be emitted to do the loading if that's not the case. Code may also
3365 * be emitted if we have to free up a register to satify the request.
3366 *
3367 * @returns The host register number; throws VBox status code on failure, so no
3368 * need to check the return value.
3369 * @param pReNative The native recompile state.
3370 * @param poff Pointer to the variable with the code buffer
3371 * position. This will be update if we need to move a
3372 * variable from register to stack in order to satisfy
3373 * the request.
3374 * @param enmGstReg The guest register that will is to be updated.
3375 * @param enmIntendedUse How the caller will be using the host register.
3376 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3377 */
3378DECL_HIDDEN_THROW(uint8_t)
3379iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3380 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
3381{
3382 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3383#ifdef LOG_ENABLED
3384 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3385#endif
3386
3387 /*
3388 * First check if the guest register value is already in a host register.
3389 */
3390 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3391 {
3392 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3393 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3394 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3395 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3396
3397 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3398 {
3399 /*
3400 * If the register will trash the guest shadow copy, try find a
3401 * completely unused register we can use instead. If that fails,
3402 * we need to disassociate the host reg from the guest reg.
3403 */
3404 /** @todo would be nice to know if preserving the register is in any way helpful. */
3405 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3406 && ( ~pReNative->Core.bmHstRegs
3407 & ~pReNative->Core.bmHstRegsWithGstShadow
3408 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3409 {
3410 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
3411
3412 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3413
3414 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3415 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3416 g_apszIemNativeHstRegNames[idxRegNew]));
3417 idxReg = idxRegNew;
3418 }
3419 else
3420 {
3421 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3422 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3423 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3424 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3425 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3426 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3427 else
3428 {
3429 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3430 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3431 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3432 }
3433 }
3434 }
3435 else
3436 {
3437 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3438 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3439 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3440 idxReg, s_pszIntendedUse[enmIntendedUse]));
3441
3442 /*
3443 * Allocate a new register, copy the value and, if updating, the
3444 * guest shadow copy assignment to the new register.
3445 */
3446 /** @todo share register for readonly access. */
3447 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3448
3449 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3450 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3451
3452 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3453 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3454 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3455 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3456 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3457 else
3458 {
3459 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3460 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3461 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3462 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3463 }
3464 idxReg = idxRegNew;
3465 }
3466
3467#ifdef VBOX_STRICT
3468 /* Strict builds: Check that the value is correct. */
3469 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3470#endif
3471
3472 return idxReg;
3473 }
3474
3475 /*
3476 * Allocate a new register, load it with the guest value and designate it as a copy of the
3477 */
3478 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3479
3480 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3481 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3482
3483 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3484 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3485 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3486 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3487
3488 return idxRegNew;
3489}
3490
3491
3492/**
3493 * Allocates a temporary host general purpose register that already holds the
3494 * given guest register value.
3495 *
3496 * The use case for this function is places where the shadowing state cannot be
3497 * modified due to branching and such. This will fail if the we don't have a
3498 * current shadow copy handy or if it's incompatible. The only code that will
3499 * be emitted here is value checking code in strict builds.
3500 *
3501 * The intended use can only be readonly!
3502 *
3503 * @returns The host register number, UINT8_MAX if not present.
3504 * @param pReNative The native recompile state.
3505 * @param poff Pointer to the instruction buffer offset.
3506 * Will be updated in strict builds if a register is
3507 * found.
3508 * @param enmGstReg The guest register that will is to be updated.
3509 * @note In strict builds, this may throw instruction buffer growth failures.
3510 * Non-strict builds will not throw anything.
3511 * @sa iemNativeRegAllocTmpForGuestReg
3512 */
3513DECL_HIDDEN_THROW(uint8_t)
3514iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3515{
3516 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3517
3518 /*
3519 * First check if the guest register value is already in a host register.
3520 */
3521 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3522 {
3523 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3524 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3525 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3526 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3527
3528 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3529 {
3530 /*
3531 * We only do readonly use here, so easy compared to the other
3532 * variant of this code.
3533 */
3534 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3535 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3536 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3537 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3538 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3539
3540#ifdef VBOX_STRICT
3541 /* Strict builds: Check that the value is correct. */
3542 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3543#else
3544 RT_NOREF(poff);
3545#endif
3546 return idxReg;
3547 }
3548 }
3549
3550 return UINT8_MAX;
3551}
3552
3553
3554DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3555
3556
3557/**
3558 * Allocates argument registers for a function call.
3559 *
3560 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3561 * need to check the return value.
3562 * @param pReNative The native recompile state.
3563 * @param off The current code buffer offset.
3564 * @param cArgs The number of arguments the function call takes.
3565 */
3566DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3567{
3568 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3569 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3570 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3571 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3572
3573 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3574 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3575 else if (cArgs == 0)
3576 return true;
3577
3578 /*
3579 * Do we get luck and all register are free and not shadowing anything?
3580 */
3581 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3582 for (uint32_t i = 0; i < cArgs; i++)
3583 {
3584 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3585 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3586 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3587 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3588 }
3589 /*
3590 * Okay, not lucky so we have to free up the registers.
3591 */
3592 else
3593 for (uint32_t i = 0; i < cArgs; i++)
3594 {
3595 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3596 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3597 {
3598 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3599 {
3600 case kIemNativeWhat_Var:
3601 {
3602 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3603 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3604 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3605 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3606 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3607
3608 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3609 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3610 else
3611 {
3612 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3613 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3614 }
3615 break;
3616 }
3617
3618 case kIemNativeWhat_Tmp:
3619 case kIemNativeWhat_Arg:
3620 case kIemNativeWhat_rc:
3621 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3622 default:
3623 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3624 }
3625
3626 }
3627 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3628 {
3629 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3630 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3631 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3632 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3633 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3634 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3635 }
3636 else
3637 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3638 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3639 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3640 }
3641 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3642 return true;
3643}
3644
3645
3646DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3647
3648
3649#if 0
3650/**
3651 * Frees a register assignment of any type.
3652 *
3653 * @param pReNative The native recompile state.
3654 * @param idxHstReg The register to free.
3655 *
3656 * @note Does not update variables.
3657 */
3658DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3659{
3660 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3661 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3662 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3663 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3664 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3665 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3666 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3667 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3668 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3669 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3670 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3671 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3672 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3673 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3674
3675 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3676 /* no flushing, right:
3677 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3678 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3679 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3680 */
3681}
3682#endif
3683
3684
3685/**
3686 * Frees a temporary register.
3687 *
3688 * Any shadow copies of guest registers assigned to the host register will not
3689 * be flushed by this operation.
3690 */
3691DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3692{
3693 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3694 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3695 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3696 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3697 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3698}
3699
3700
3701/**
3702 * Frees a temporary immediate register.
3703 *
3704 * It is assumed that the call has not modified the register, so it still hold
3705 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3706 */
3707DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3708{
3709 iemNativeRegFreeTmp(pReNative, idxHstReg);
3710}
3711
3712
3713/**
3714 * Frees a register assigned to a variable.
3715 *
3716 * The register will be disassociated from the variable.
3717 */
3718DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3719{
3720 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3721 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3722 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3723 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3724 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
3725
3726 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3727 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3728 if (!fFlushShadows)
3729 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
3730 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3731 else
3732 {
3733 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3734 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3735 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3736 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3737 uint64_t fGstRegShadows = fGstRegShadowsOld;
3738 while (fGstRegShadows)
3739 {
3740 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3741 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3742
3743 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3744 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3745 }
3746 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
3747 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3748 }
3749}
3750
3751
3752/**
3753 * Called right before emitting a call instruction to move anything important
3754 * out of call-volatile registers, free and flush the call-volatile registers,
3755 * optionally freeing argument variables.
3756 *
3757 * @returns New code buffer offset, UINT32_MAX on failure.
3758 * @param pReNative The native recompile state.
3759 * @param off The code buffer offset.
3760 * @param cArgs The number of arguments the function call takes.
3761 * It is presumed that the host register part of these have
3762 * been allocated as such already and won't need moving,
3763 * just freeing.
3764 */
3765DECL_HIDDEN_THROW(uint32_t)
3766iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3767{
3768 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
3769
3770 /*
3771 * Move anything important out of volatile registers.
3772 */
3773 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3774 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3775 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
3776#ifdef IEMNATIVE_REG_FIXED_TMP0
3777 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3778#endif
3779 & ~g_afIemNativeCallRegs[cArgs];
3780
3781 fRegsToMove &= pReNative->Core.bmHstRegs;
3782 if (!fRegsToMove)
3783 { /* likely */ }
3784 else
3785 {
3786 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
3787 while (fRegsToMove != 0)
3788 {
3789 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
3790 fRegsToMove &= ~RT_BIT_32(idxReg);
3791
3792 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3793 {
3794 case kIemNativeWhat_Var:
3795 {
3796 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3797 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3798 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3799 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3800 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
3801 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
3802 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3803 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3804 else
3805 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3806 continue;
3807 }
3808
3809 case kIemNativeWhat_Arg:
3810 AssertMsgFailed(("What?!?: %u\n", idxReg));
3811 continue;
3812
3813 case kIemNativeWhat_rc:
3814 case kIemNativeWhat_Tmp:
3815 AssertMsgFailed(("Missing free: %u\n", idxReg));
3816 continue;
3817
3818 case kIemNativeWhat_FixedTmp:
3819 case kIemNativeWhat_pVCpuFixed:
3820 case kIemNativeWhat_pCtxFixed:
3821 case kIemNativeWhat_FixedReserved:
3822 case kIemNativeWhat_Invalid:
3823 case kIemNativeWhat_End:
3824 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3825 }
3826 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3827 }
3828 }
3829
3830 /*
3831 * Do the actual freeing.
3832 */
3833 if (pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3834 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n", pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3835 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3836
3837 /* If there are guest register shadows in any call-volatile register, we
3838 have to clear the corrsponding guest register masks for each register. */
3839 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3840 if (fHstRegsWithGstShadow)
3841 {
3842 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3843 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
3844 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3845 do
3846 {
3847 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3848 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3849
3850 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
3851 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3852 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3853 } while (fHstRegsWithGstShadow != 0);
3854 }
3855
3856 return off;
3857}
3858
3859
3860/**
3861 * Flushes a set of guest register shadow copies.
3862 *
3863 * This is usually done after calling a threaded function or a C-implementation
3864 * of an instruction.
3865 *
3866 * @param pReNative The native recompile state.
3867 * @param fGstRegs Set of guest registers to flush.
3868 */
3869DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3870{
3871 /*
3872 * Reduce the mask by what's currently shadowed
3873 */
3874 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
3875 fGstRegs &= bmGstRegShadowsOld;
3876 if (fGstRegs)
3877 {
3878 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
3879 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
3880 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
3881 if (bmGstRegShadowsNew)
3882 {
3883 /*
3884 * Partial.
3885 */
3886 do
3887 {
3888 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3889 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3890 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3891 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3892 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3893
3894 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3895 fGstRegs &= ~fInThisHstReg;
3896 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
3897 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3898 if (!fGstRegShadowsNew)
3899 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3900 } while (fGstRegs != 0);
3901 }
3902 else
3903 {
3904 /*
3905 * Clear all.
3906 */
3907 do
3908 {
3909 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3910 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3911 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3912 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3913 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3914
3915 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3916 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3917 } while (fGstRegs != 0);
3918 pReNative->Core.bmHstRegsWithGstShadow = 0;
3919 }
3920 }
3921}
3922
3923
3924/**
3925 * Flushes delayed write of a specific guest register.
3926 *
3927 * This must be called prior to calling CImpl functions and any helpers that use
3928 * the guest state (like raising exceptions) and such.
3929 *
3930 * This optimization has not yet been implemented. The first target would be
3931 * RIP updates, since these are the most common ones.
3932 */
3933DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3934 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
3935{
3936 RT_NOREF(pReNative, enmClass, idxReg);
3937 return off;
3938}
3939
3940
3941/**
3942 * Flushes any delayed guest register writes.
3943 *
3944 * This must be called prior to calling CImpl functions and any helpers that use
3945 * the guest state (like raising exceptions) and such.
3946 *
3947 * This optimization has not yet been implemented. The first target would be
3948 * RIP updates, since these are the most common ones.
3949 */
3950DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3951{
3952 RT_NOREF(pReNative, off);
3953 return off;
3954}
3955
3956
3957#ifdef VBOX_STRICT
3958/**
3959 * Does internal register allocator sanity checks.
3960 */
3961static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
3962{
3963 /*
3964 * Iterate host registers building a guest shadowing set.
3965 */
3966 uint64_t bmGstRegShadows = 0;
3967 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
3968 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
3969 while (bmHstRegsWithGstShadow)
3970 {
3971 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
3972 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3973 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3974
3975 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3976 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
3977 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
3978 bmGstRegShadows |= fThisGstRegShadows;
3979 while (fThisGstRegShadows)
3980 {
3981 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
3982 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
3983 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
3984 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
3985 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
3986 }
3987 }
3988 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
3989 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
3990 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
3991
3992 /*
3993 * Now the other way around, checking the guest to host index array.
3994 */
3995 bmHstRegsWithGstShadow = 0;
3996 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
3997 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3998 while (bmGstRegShadows)
3999 {
4000 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4001 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4002 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4003
4004 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4005 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4006 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4007 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4008 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4009 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4010 }
4011 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4012 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4013 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4014}
4015#endif
4016
4017
4018/*********************************************************************************************************************************
4019* Code Emitters (larger snippets) *
4020*********************************************************************************************************************************/
4021
4022/**
4023 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4024 * extending to 64-bit width.
4025 *
4026 * @returns New code buffer offset on success, UINT32_MAX on failure.
4027 * @param pReNative .
4028 * @param off The current code buffer position.
4029 * @param idxHstReg The host register to load the guest register value into.
4030 * @param enmGstReg The guest register to load.
4031 *
4032 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4033 * that is something the caller needs to do if applicable.
4034 */
4035DECL_HIDDEN_THROW(uint32_t)
4036iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4037{
4038 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4039 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4040
4041 switch (g_aGstShadowInfo[enmGstReg].cb)
4042 {
4043 case sizeof(uint64_t):
4044 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4045 case sizeof(uint32_t):
4046 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4047 case sizeof(uint16_t):
4048 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4049#if 0 /* not present in the table. */
4050 case sizeof(uint8_t):
4051 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4052#endif
4053 default:
4054 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4055 }
4056}
4057
4058
4059#ifdef VBOX_STRICT
4060/**
4061 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4062 *
4063 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4064 * Trashes EFLAGS on AMD64.
4065 */
4066static uint32_t
4067iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4068{
4069# ifdef RT_ARCH_AMD64
4070 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4071
4072 /* rol reg64, 32 */
4073 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4074 pbCodeBuf[off++] = 0xc1;
4075 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4076 pbCodeBuf[off++] = 32;
4077
4078 /* test reg32, ffffffffh */
4079 if (idxReg >= 8)
4080 pbCodeBuf[off++] = X86_OP_REX_B;
4081 pbCodeBuf[off++] = 0xf7;
4082 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4083 pbCodeBuf[off++] = 0xff;
4084 pbCodeBuf[off++] = 0xff;
4085 pbCodeBuf[off++] = 0xff;
4086 pbCodeBuf[off++] = 0xff;
4087
4088 /* je/jz +1 */
4089 pbCodeBuf[off++] = 0x74;
4090 pbCodeBuf[off++] = 0x01;
4091
4092 /* int3 */
4093 pbCodeBuf[off++] = 0xcc;
4094
4095 /* rol reg64, 32 */
4096 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4097 pbCodeBuf[off++] = 0xc1;
4098 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4099 pbCodeBuf[off++] = 32;
4100
4101# elif defined(RT_ARCH_ARM64)
4102 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4103 /* lsr tmp0, reg64, #32 */
4104 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4105 /* cbz tmp0, +1 */
4106 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4107 /* brk #0x1100 */
4108 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4109
4110# else
4111# error "Port me!"
4112# endif
4113 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4114 return off;
4115}
4116#endif /* VBOX_STRICT */
4117
4118
4119#ifdef VBOX_STRICT
4120/**
4121 * Emitting code that checks that the content of register @a idxReg is the same
4122 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4123 * instruction if that's not the case.
4124 *
4125 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4126 * Trashes EFLAGS on AMD64.
4127 */
4128static uint32_t
4129iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4130{
4131# ifdef RT_ARCH_AMD64
4132 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4133
4134 /* cmp reg, [mem] */
4135 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4136 {
4137 if (idxReg >= 8)
4138 pbCodeBuf[off++] = X86_OP_REX_R;
4139 pbCodeBuf[off++] = 0x38;
4140 }
4141 else
4142 {
4143 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4144 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4145 else
4146 {
4147 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4148 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4149 else
4150 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4151 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4152 if (idxReg >= 8)
4153 pbCodeBuf[off++] = X86_OP_REX_R;
4154 }
4155 pbCodeBuf[off++] = 0x39;
4156 }
4157 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4158
4159 /* je/jz +1 */
4160 pbCodeBuf[off++] = 0x74;
4161 pbCodeBuf[off++] = 0x01;
4162
4163 /* int3 */
4164 pbCodeBuf[off++] = 0xcc;
4165
4166 /* For values smaller than the register size, we must check that the rest
4167 of the register is all zeros. */
4168 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4169 {
4170 /* test reg64, imm32 */
4171 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4172 pbCodeBuf[off++] = 0xf7;
4173 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4174 pbCodeBuf[off++] = 0;
4175 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4176 pbCodeBuf[off++] = 0xff;
4177 pbCodeBuf[off++] = 0xff;
4178
4179 /* je/jz +1 */
4180 pbCodeBuf[off++] = 0x74;
4181 pbCodeBuf[off++] = 0x01;
4182
4183 /* int3 */
4184 pbCodeBuf[off++] = 0xcc;
4185 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4186 }
4187 else
4188 {
4189 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4190 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4191 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4192 }
4193
4194# elif defined(RT_ARCH_ARM64)
4195 /* mov TMP0, [gstreg] */
4196 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4197
4198 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4199 /* sub tmp0, tmp0, idxReg */
4200 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4201 /* cbz tmp0, +1 */
4202 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4203 /* brk #0x1000+enmGstReg */
4204 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4205 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4206
4207# else
4208# error "Port me!"
4209# endif
4210 return off;
4211}
4212#endif /* VBOX_STRICT */
4213
4214
4215#ifdef VBOX_STRICT
4216/**
4217 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4218 * important bits.
4219 *
4220 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4221 * Trashes EFLAGS on AMD64.
4222 */
4223static uint32_t
4224iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4225{
4226 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4227 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4228 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4229 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4230
4231#ifdef RT_ARCH_AMD64
4232 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4233
4234 /* je/jz +1 */
4235 pbCodeBuf[off++] = 0x74;
4236 pbCodeBuf[off++] = 0x01;
4237
4238 /* int3 */
4239 pbCodeBuf[off++] = 0xcc;
4240
4241# elif defined(RT_ARCH_ARM64)
4242 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4243
4244 /* b.eq +1 */
4245 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4246 /* brk #0x2000 */
4247 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4248
4249# else
4250# error "Port me!"
4251# endif
4252 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4253
4254 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4255 return off;
4256}
4257#endif /* VBOX_STRICT */
4258
4259
4260/**
4261 * Emits a code for checking the return code of a call and rcPassUp, returning
4262 * from the code if either are non-zero.
4263 */
4264DECL_HIDDEN_THROW(uint32_t)
4265iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4266{
4267#ifdef RT_ARCH_AMD64
4268 /*
4269 * AMD64: eax = call status code.
4270 */
4271
4272 /* edx = rcPassUp */
4273 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4274# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4275 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4276# endif
4277
4278 /* edx = eax | rcPassUp */
4279 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4280 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4282 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4283
4284 /* Jump to non-zero status return path. */
4285 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
4286
4287 /* done. */
4288
4289#elif RT_ARCH_ARM64
4290 /*
4291 * ARM64: w0 = call status code.
4292 */
4293# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4294 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
4295# endif
4296 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4297
4298 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4299
4300 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
4301
4302 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4303 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
4304 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
4305
4306#else
4307# error "port me"
4308#endif
4309 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4310 return off;
4311}
4312
4313
4314/**
4315 * Emits code to check if the content of @a idxAddrReg is a canonical address,
4316 * raising a \#GP(0) if it isn't.
4317 *
4318 * @returns New code buffer offset, UINT32_MAX on failure.
4319 * @param pReNative The native recompile state.
4320 * @param off The code buffer offset.
4321 * @param idxAddrReg The host register with the address to check.
4322 * @param idxInstr The current instruction.
4323 */
4324DECL_HIDDEN_THROW(uint32_t)
4325iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
4326{
4327 RT_NOREF(idxInstr);
4328
4329 /*
4330 * Make sure we don't have any outstanding guest register writes as we may
4331 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4332 */
4333 off = iemNativeRegFlushPendingWrites(pReNative, off);
4334
4335#ifdef RT_ARCH_AMD64
4336 /*
4337 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
4338 * return raisexcpt();
4339 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
4340 */
4341 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4342
4343 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
4344 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
4345 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
4346 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
4347
4348# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4349 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4350# else
4351 uint32_t const offFixup = off;
4352 off = iemNativeEmitJzToFixed(pReNative, off, 0);
4353 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
4354 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4355 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
4356# endif
4357
4358 iemNativeRegFreeTmp(pReNative, iTmpReg);
4359
4360#elif defined(RT_ARCH_ARM64)
4361 /*
4362 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
4363 * return raisexcpt();
4364 * ----
4365 * mov x1, 0x800000000000
4366 * add x1, x0, x1
4367 * cmp xzr, x1, lsr 48
4368 * and either:
4369 * b.ne .Lraisexcpt
4370 * or:
4371 * b.eq .Lnoexcept
4372 * movz x1, #instruction-number
4373 * b .Lraisexcpt
4374 * .Lnoexcept:
4375 */
4376 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4377
4378 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
4379 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
4380 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
4381
4382# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4383 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4384# else
4385 uint32_t const offFixup = off;
4386 off = iemNativeEmitJzToFixed(pReNative, off, 0);
4387 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
4388 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4389 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
4390# endif
4391
4392 iemNativeRegFreeTmp(pReNative, iTmpReg);
4393
4394#else
4395# error "Port me"
4396#endif
4397 return off;
4398}
4399
4400
4401/**
4402 * Emits code to check if the content of @a idxAddrReg is within the limit of
4403 * idxSegReg, raising a \#GP(0) if it isn't.
4404 *
4405 * @returns New code buffer offset; throws VBox status code on error.
4406 * @param pReNative The native recompile state.
4407 * @param off The code buffer offset.
4408 * @param idxAddrReg The host register (32-bit) with the address to
4409 * check.
4410 * @param idxSegReg The segment register (X86_SREG_XXX) to check
4411 * against.
4412 * @param idxInstr The current instruction.
4413 */
4414DECL_HIDDEN_THROW(uint32_t)
4415iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4416 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
4417{
4418 /*
4419 * Make sure we don't have any outstanding guest register writes as we may
4420 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4421 */
4422 off = iemNativeRegFlushPendingWrites(pReNative, off);
4423
4424 /** @todo implement expand down/whatnot checking */
4425 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
4426
4427 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4428 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
4429 kIemNativeGstRegUse_ForUpdate);
4430
4431 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
4432
4433#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4434 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4435 RT_NOREF(idxInstr);
4436#else
4437 uint32_t const offFixup = off;
4438 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
4439 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
4440 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4441 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
4442#endif
4443
4444 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
4445 return off;
4446}
4447
4448
4449/**
4450 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
4451 *
4452 * @returns The flush mask.
4453 * @param fCImpl The IEM_CIMPL_F_XXX flags.
4454 * @param fGstShwFlush The starting flush mask.
4455 */
4456DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
4457{
4458 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
4459 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
4460 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
4461 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
4462 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
4463 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
4464 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
4465 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
4466 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
4467 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
4468 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
4469 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
4470 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4471 return fGstShwFlush;
4472}
4473
4474
4475/**
4476 * Emits a call to a CImpl function or something similar.
4477 */
4478static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
4479 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
4480 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
4481{
4482 /*
4483 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
4484 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
4485 */
4486 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
4487 fGstShwFlush
4488 | RT_BIT_64(kIemNativeGstReg_Pc)
4489 | RT_BIT_64(kIemNativeGstReg_EFlags));
4490 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4491
4492 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4493
4494 /*
4495 * Load the parameters.
4496 */
4497#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
4498 /* Special code the hidden VBOXSTRICTRC pointer. */
4499 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4500 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4501 if (cAddParams > 0)
4502 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
4503 if (cAddParams > 1)
4504 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
4505 if (cAddParams > 2)
4506 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
4507 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4508
4509#else
4510 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4511 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4512 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4513 if (cAddParams > 0)
4514 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
4515 if (cAddParams > 1)
4516 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
4517 if (cAddParams > 2)
4518# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
4519 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
4520# else
4521 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
4522# endif
4523#endif
4524
4525 /*
4526 * Make the call.
4527 */
4528 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
4529
4530#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4531 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4532#endif
4533
4534 /*
4535 * Check the status code.
4536 */
4537 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4538}
4539
4540
4541/**
4542 * Emits a call to a threaded worker function.
4543 */
4544static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
4545{
4546 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
4547 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4548
4549#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4550 /* The threaded function may throw / long jmp, so set current instruction
4551 number if we're counting. */
4552 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4553#endif
4554
4555 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
4556
4557#ifdef RT_ARCH_AMD64
4558 /* Load the parameters and emit the call. */
4559# ifdef RT_OS_WINDOWS
4560# ifndef VBOXSTRICTRC_STRICT_ENABLED
4561 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4562 if (cParams > 0)
4563 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
4564 if (cParams > 1)
4565 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
4566 if (cParams > 2)
4567 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
4568# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
4569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
4570 if (cParams > 0)
4571 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4572 if (cParams > 1)
4573 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4574 if (cParams > 2)
4575 {
4576 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4577 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4578 }
4579 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4580# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4581# else
4582 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4583 if (cParams > 0)
4584 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4585 if (cParams > 1)
4586 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4587 if (cParams > 2)
4588 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4589# endif
4590
4591 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4592
4593# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4594 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4595# endif
4596
4597#elif RT_ARCH_ARM64
4598 /*
4599 * ARM64:
4600 */
4601 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4602 if (cParams > 0)
4603 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4604 if (cParams > 1)
4605 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4606 if (cParams > 2)
4607 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4608
4609 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4610
4611#else
4612# error "port me"
4613#endif
4614
4615 /*
4616 * Check the status code.
4617 */
4618 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4619
4620 return off;
4621}
4622
4623
4624/**
4625 * Emits the code at the RaiseGP0 label.
4626 */
4627static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4628{
4629 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
4630 if (idxLabel != UINT32_MAX)
4631 {
4632 iemNativeLabelDefine(pReNative, idxLabel, off);
4633
4634 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
4635 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4636#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4637 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
4638#endif
4639 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
4640
4641 /* jump back to the return sequence. */
4642 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4643 }
4644 return off;
4645}
4646
4647
4648/**
4649 * Emits the code at the ReturnWithFlags label (returns
4650 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
4651 */
4652static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4653{
4654 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
4655 if (idxLabel != UINT32_MAX)
4656 {
4657 iemNativeLabelDefine(pReNative, idxLabel, off);
4658
4659 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
4660
4661 /* jump back to the return sequence. */
4662 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4663 }
4664 return off;
4665}
4666
4667
4668/**
4669 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4670 */
4671static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4672{
4673 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4674 if (idxLabel != UINT32_MAX)
4675 {
4676 iemNativeLabelDefine(pReNative, idxLabel, off);
4677
4678 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
4679
4680 /* jump back to the return sequence. */
4681 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4682 }
4683 return off;
4684}
4685
4686
4687/**
4688 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
4689 */
4690static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4691{
4692 /*
4693 * Generate the rc + rcPassUp fiddling code if needed.
4694 */
4695 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4696 if (idxLabel != UINT32_MAX)
4697 {
4698 iemNativeLabelDefine(pReNative, idxLabel, off);
4699
4700 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
4701#ifdef RT_ARCH_AMD64
4702# ifdef RT_OS_WINDOWS
4703# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4704 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
4705# endif
4706 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4707 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
4708# else
4709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4710 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
4711# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4712 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
4713# endif
4714# endif
4715# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4716 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
4717# endif
4718
4719#else
4720 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
4721 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4722 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
4723#endif
4724
4725 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
4726 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4727 }
4728 return off;
4729}
4730
4731
4732/**
4733 * Emits a standard epilog.
4734 */
4735static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
4736{
4737 *pidxReturnLabel = UINT32_MAX;
4738
4739 /*
4740 * Successful return, so clear the return register (eax, w0).
4741 */
4742 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
4743
4744 /*
4745 * Define label for common return point.
4746 */
4747 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
4748 *pidxReturnLabel = idxReturn;
4749
4750 /*
4751 * Restore registers and return.
4752 */
4753#ifdef RT_ARCH_AMD64
4754 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4755
4756 /* Reposition esp at the r15 restore point. */
4757 pbCodeBuf[off++] = X86_OP_REX_W;
4758 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
4759 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
4760 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
4761
4762 /* Pop non-volatile registers and return */
4763 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
4764 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
4765 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
4766 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
4767 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
4768 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
4769 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
4770 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
4771# ifdef RT_OS_WINDOWS
4772 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
4773 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
4774# endif
4775 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
4776 pbCodeBuf[off++] = 0xc9; /* leave */
4777 pbCodeBuf[off++] = 0xc3; /* ret */
4778 pbCodeBuf[off++] = 0xcc; /* int3 poison */
4779
4780#elif RT_ARCH_ARM64
4781 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4782
4783 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
4784 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
4785 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4786 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4787 IEMNATIVE_FRAME_VAR_SIZE / 8);
4788 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
4789 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4790 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4791 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4792 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4793 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4794 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4795 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4796 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4797 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4798 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4799 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4800
4801 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
4802 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
4803 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
4804 IEMNATIVE_FRAME_SAVE_REG_SIZE);
4805
4806 /* retab / ret */
4807# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
4808 if (1)
4809 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
4810 else
4811# endif
4812 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
4813
4814#else
4815# error "port me"
4816#endif
4817 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4818
4819 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
4820}
4821
4822
4823/**
4824 * Emits a standard prolog.
4825 */
4826static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4827{
4828#ifdef RT_ARCH_AMD64
4829 /*
4830 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
4831 * reserving 64 bytes for stack variables plus 4 non-register argument
4832 * slots. Fixed register assignment: xBX = pReNative;
4833 *
4834 * Since we always do the same register spilling, we can use the same
4835 * unwind description for all the code.
4836 */
4837 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4838 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
4839 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
4840 pbCodeBuf[off++] = 0x8b;
4841 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
4842 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
4843 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
4844# ifdef RT_OS_WINDOWS
4845 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
4846 pbCodeBuf[off++] = 0x8b;
4847 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
4848 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
4849 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
4850# else
4851 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
4852 pbCodeBuf[off++] = 0x8b;
4853 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
4854# endif
4855 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
4856 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
4857 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
4858 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
4859 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
4860 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
4861 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
4862 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
4863
4864 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
4865 X86_GREG_xSP,
4866 IEMNATIVE_FRAME_ALIGN_SIZE
4867 + IEMNATIVE_FRAME_VAR_SIZE
4868 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
4869 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
4870 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
4871 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
4872 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
4873
4874#elif RT_ARCH_ARM64
4875 /*
4876 * We set up a stack frame exactly like on x86, only we have to push the
4877 * return address our selves here. We save all non-volatile registers.
4878 */
4879 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4880
4881# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
4882 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
4883 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
4884 * in any way conditional, so just emitting this instructions now and hoping for the best... */
4885 /* pacibsp */
4886 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
4887# endif
4888
4889 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
4890 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
4891 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4892 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4893 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
4894 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
4895 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4896 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4897 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4898 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4899 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4900 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4901 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4902 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4903 /* Save the BP and LR (ret address) registers at the top of the frame. */
4904 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4905 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4906 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4907 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
4908 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
4909 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
4910
4911 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
4912 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
4913
4914 /* mov r28, r0 */
4915 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
4916 /* mov r27, r1 */
4917 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
4918
4919#else
4920# error "port me"
4921#endif
4922 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4923 return off;
4924}
4925
4926
4927
4928
4929/*********************************************************************************************************************************
4930* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
4931*********************************************************************************************************************************/
4932
4933#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
4934 { \
4935 Assert(pReNative->Core.bmVars == 0); \
4936 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
4937 Assert(pReNative->Core.bmStack == 0); \
4938 pReNative->fMc = (a_fMcFlags); \
4939 pReNative->fCImpl = (a_fCImplFlags); \
4940 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
4941
4942/** We have to get to the end in recompilation mode, as otherwise we won't
4943 * generate code for all the IEM_MC_IF_XXX branches. */
4944#define IEM_MC_END() \
4945 iemNativeVarFreeAll(pReNative); \
4946 } return off
4947
4948
4949
4950/*********************************************************************************************************************************
4951* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
4952*********************************************************************************************************************************/
4953
4954#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
4955 pReNative->fMc = 0; \
4956 pReNative->fCImpl = (a_fFlags); \
4957 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
4958
4959
4960#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4961 pReNative->fMc = 0; \
4962 pReNative->fCImpl = (a_fFlags); \
4963 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
4964
4965DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4966 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4967 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
4968{
4969 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
4970}
4971
4972
4973#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4974 pReNative->fMc = 0; \
4975 pReNative->fCImpl = (a_fFlags); \
4976 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4977 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
4978
4979DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4980 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4981 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
4982{
4983 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
4984}
4985
4986
4987#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4988 pReNative->fMc = 0; \
4989 pReNative->fCImpl = (a_fFlags); \
4990 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4991 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
4992
4993DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4994 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4995 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
4996 uint64_t uArg2)
4997{
4998 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
4999}
5000
5001
5002
5003/*********************************************************************************************************************************
5004* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5005*********************************************************************************************************************************/
5006
5007/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5008 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5009DECL_INLINE_THROW(uint32_t)
5010iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5011{
5012 /*
5013 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5014 * return with special status code and make the execution loop deal with
5015 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5016 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5017 * could continue w/o interruption, it probably will drop into the
5018 * debugger, so not worth the effort of trying to services it here and we
5019 * just lump it in with the handling of the others.
5020 *
5021 * To simplify the code and the register state management even more (wrt
5022 * immediate in AND operation), we always update the flags and skip the
5023 * extra check associated conditional jump.
5024 */
5025 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5026 <= UINT32_MAX);
5027 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5028 kIemNativeGstRegUse_ForUpdate);
5029 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5030 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5031 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5032 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5033 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5034
5035 /* Free but don't flush the EFLAGS register. */
5036 iemNativeRegFreeTmp(pReNative, idxEflReg);
5037
5038 return off;
5039}
5040
5041
5042#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5043 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5044
5045#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5046 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5047 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5048
5049/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5050DECL_INLINE_THROW(uint32_t)
5051iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5052{
5053 /* Allocate a temporary PC register. */
5054 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5055
5056 /* Perform the addition and store the result. */
5057 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5059
5060 /* Free but don't flush the PC register. */
5061 iemNativeRegFreeTmp(pReNative, idxPcReg);
5062
5063 return off;
5064}
5065
5066
5067#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5068 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5069
5070#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5071 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5072 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5073
5074/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5075DECL_INLINE_THROW(uint32_t)
5076iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5077{
5078 /* Allocate a temporary PC register. */
5079 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5080
5081 /* Perform the addition and store the result. */
5082 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5083 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5084
5085 /* Free but don't flush the PC register. */
5086 iemNativeRegFreeTmp(pReNative, idxPcReg);
5087
5088 return off;
5089}
5090
5091
5092#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5093 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5094
5095#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5096 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5098
5099/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5100DECL_INLINE_THROW(uint32_t)
5101iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5102{
5103 /* Allocate a temporary PC register. */
5104 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5105
5106 /* Perform the addition and store the result. */
5107 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5108 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5109 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5110
5111 /* Free but don't flush the PC register. */
5112 iemNativeRegFreeTmp(pReNative, idxPcReg);
5113
5114 return off;
5115}
5116
5117
5118
5119/*********************************************************************************************************************************
5120* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5121*********************************************************************************************************************************/
5122
5123#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5125 (a_enmEffOpSize), pCallEntry->idxInstr)
5126
5127#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5128 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5129 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5130
5131#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5132 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5133 IEMMODE_16BIT, pCallEntry->idxInstr)
5134
5135#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5136 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5137 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5138
5139#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5140 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5141 IEMMODE_64BIT, pCallEntry->idxInstr)
5142
5143#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5144 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5145 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5146
5147/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5148 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5149 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5150DECL_INLINE_THROW(uint32_t)
5151iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5152 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5153{
5154 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5155
5156 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
5157 off = iemNativeRegFlushPendingWrites(pReNative, off);
5158
5159 /* Allocate a temporary PC register. */
5160 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5161
5162 /* Perform the addition. */
5163 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5164
5165 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5166 {
5167 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5168 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5169 }
5170 else
5171 {
5172 /* Just truncate the result to 16-bit IP. */
5173 Assert(enmEffOpSize == IEMMODE_16BIT);
5174 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5175 }
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180
5181 return off;
5182}
5183
5184
5185#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5186 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5187 (a_enmEffOpSize), pCallEntry->idxInstr)
5188
5189#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5190 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5191 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5192
5193#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5194 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5195 IEMMODE_16BIT, pCallEntry->idxInstr)
5196
5197#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5198 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5199 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5200
5201#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5202 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5203 IEMMODE_32BIT, pCallEntry->idxInstr)
5204
5205#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5206 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5207 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5208
5209/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5210 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5211 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5212DECL_INLINE_THROW(uint32_t)
5213iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5214 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5215{
5216 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5217
5218 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
5219 off = iemNativeRegFlushPendingWrites(pReNative, off);
5220
5221 /* Allocate a temporary PC register. */
5222 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5223
5224 /* Perform the addition. */
5225 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5226
5227 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
5228 if (enmEffOpSize == IEMMODE_16BIT)
5229 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5230
5231 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
5232 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5233
5234 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5235
5236 /* Free but don't flush the PC register. */
5237 iemNativeRegFreeTmp(pReNative, idxPcReg);
5238
5239 return off;
5240}
5241
5242
5243#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
5244 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
5245
5246#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
5247 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
5248 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5249
5250#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
5251 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
5252
5253#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
5254 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
5255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5256
5257#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
5258 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
5259
5260#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
5261 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
5262 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5263
5264/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
5265DECL_INLINE_THROW(uint32_t)
5266iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5267 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
5268{
5269 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
5270 off = iemNativeRegFlushPendingWrites(pReNative, off);
5271
5272 /* Allocate a temporary PC register. */
5273 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5274
5275 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
5276 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5277 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5278 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5279 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5280
5281 /* Free but don't flush the PC register. */
5282 iemNativeRegFreeTmp(pReNative, idxPcReg);
5283
5284 return off;
5285}
5286
5287
5288
5289/*********************************************************************************************************************************
5290* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
5291*********************************************************************************************************************************/
5292
5293/**
5294 * Pushes an IEM_MC_IF_XXX onto the condition stack.
5295 *
5296 * @returns Pointer to the condition stack entry on success, NULL on failure
5297 * (too many nestings)
5298 */
5299DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
5300{
5301 uint32_t const idxStack = pReNative->cCondDepth;
5302 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
5303
5304 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
5305 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
5306
5307 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
5308 pEntry->fInElse = false;
5309 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
5310 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
5311
5312 return pEntry;
5313}
5314
5315
5316/**
5317 * Start of the if-block, snapshotting the register and variable state.
5318 */
5319DECL_INLINE_THROW(void)
5320iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
5321{
5322 Assert(offIfBlock != UINT32_MAX);
5323 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5324 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5325 Assert(!pEntry->fInElse);
5326
5327 /* Define the start of the IF block if request or for disassembly purposes. */
5328 if (idxLabelIf != UINT32_MAX)
5329 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
5330#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5331 else
5332 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
5333#else
5334 RT_NOREF(offIfBlock);
5335#endif
5336
5337 /* Copy the initial state so we can restore it in the 'else' block. */
5338 pEntry->InitialState = pReNative->Core;
5339}
5340
5341
5342#define IEM_MC_ELSE() } while (0); \
5343 off = iemNativeEmitElse(pReNative, off); \
5344 do {
5345
5346/** Emits code related to IEM_MC_ELSE. */
5347DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5348{
5349 /* Check sanity and get the conditional stack entry. */
5350 Assert(off != UINT32_MAX);
5351 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5352 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5353 Assert(!pEntry->fInElse);
5354
5355 /* Jump to the endif */
5356 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
5357
5358 /* Define the else label and enter the else part of the condition. */
5359 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5360 pEntry->fInElse = true;
5361
5362 /* Snapshot the core state so we can do a merge at the endif and restore
5363 the snapshot we took at the start of the if-block. */
5364 pEntry->IfFinalState = pReNative->Core;
5365 pReNative->Core = pEntry->InitialState;
5366
5367 return off;
5368}
5369
5370
5371#define IEM_MC_ENDIF() } while (0); \
5372 off = iemNativeEmitEndIf(pReNative, off)
5373
5374/** Emits code related to IEM_MC_ENDIF. */
5375DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5376{
5377 /* Check sanity and get the conditional stack entry. */
5378 Assert(off != UINT32_MAX);
5379 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5380 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5381
5382 /*
5383 * Now we have find common group with the core state at the end of the
5384 * if-final. Use the smallest common denominator and just drop anything
5385 * that isn't the same in both states.
5386 */
5387 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
5388 * which is why we're doing this at the end of the else-block.
5389 * But we'd need more info about future for that to be worth the effort. */
5390 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
5391 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
5392 {
5393 /* shadow guest stuff first. */
5394 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
5395 if (fGstRegs)
5396 {
5397 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
5398 do
5399 {
5400 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5401 fGstRegs &= ~RT_BIT_64(idxGstReg);
5402
5403 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5404 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
5405 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
5406 {
5407 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
5408 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
5409 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
5410 }
5411 } while (fGstRegs);
5412 }
5413 else
5414 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
5415
5416 /* Check variables next. For now we must require them to be identical
5417 or stuff we can recreate. */
5418 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
5419 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
5420 if (fVars)
5421 {
5422 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
5423 do
5424 {
5425 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
5426 fVars &= ~RT_BIT_32(idxVar);
5427
5428 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
5429 {
5430 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
5431 continue;
5432 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5433 {
5434 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5435 if (idxHstReg != UINT8_MAX)
5436 {
5437 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5438 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5439 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
5440 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5441 }
5442 continue;
5443 }
5444 }
5445 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
5446 continue;
5447
5448 /* Irreconcilable, so drop it. */
5449 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5450 if (idxHstReg != UINT8_MAX)
5451 {
5452 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5453 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5454 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
5455 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5456 }
5457 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
5458 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5459 } while (fVars);
5460 }
5461
5462 /* Finally, check that the host register allocations matches. */
5463 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
5464 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
5465 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
5466 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
5467 }
5468
5469 /*
5470 * Define the endif label and maybe the else one if we're still in the 'if' part.
5471 */
5472 if (!pEntry->fInElse)
5473 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5474 else
5475 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
5476 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
5477
5478 /* Pop the conditional stack.*/
5479 pReNative->cCondDepth -= 1;
5480
5481 return off;
5482}
5483
5484
5485#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
5486 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
5487 do {
5488
5489/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
5490DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5491{
5492 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5493
5494 /* Get the eflags. */
5495 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5496 kIemNativeGstRegUse_ReadOnly);
5497
5498 /* Test and jump. */
5499 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5500
5501 /* Free but don't flush the EFlags register. */
5502 iemNativeRegFreeTmp(pReNative, idxEflReg);
5503
5504 /* Make a copy of the core state now as we start the if-block. */
5505 iemNativeCondStartIfBlock(pReNative, off);
5506
5507 return off;
5508}
5509
5510
5511#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
5512 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
5513 do {
5514
5515/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
5516DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5517{
5518 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5519
5520 /* Get the eflags. */
5521 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5522 kIemNativeGstRegUse_ReadOnly);
5523
5524 /* Test and jump. */
5525 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5526
5527 /* Free but don't flush the EFlags register. */
5528 iemNativeRegFreeTmp(pReNative, idxEflReg);
5529
5530 /* Make a copy of the core state now as we start the if-block. */
5531 iemNativeCondStartIfBlock(pReNative, off);
5532
5533 return off;
5534}
5535
5536
5537#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
5538 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
5539 do {
5540
5541/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
5542DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5543{
5544 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5545
5546 /* Get the eflags. */
5547 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5548 kIemNativeGstRegUse_ReadOnly);
5549
5550 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5551 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5552
5553 /* Test and jump. */
5554 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5555
5556 /* Free but don't flush the EFlags register. */
5557 iemNativeRegFreeTmp(pReNative, idxEflReg);
5558
5559 /* Make a copy of the core state now as we start the if-block. */
5560 iemNativeCondStartIfBlock(pReNative, off);
5561
5562 return off;
5563}
5564
5565
5566#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
5567 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
5568 do {
5569
5570/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
5571DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5572{
5573 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5574
5575 /* Get the eflags. */
5576 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5577 kIemNativeGstRegUse_ReadOnly);
5578
5579 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5580 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5581
5582 /* Test and jump. */
5583 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5584
5585 /* Free but don't flush the EFlags register. */
5586 iemNativeRegFreeTmp(pReNative, idxEflReg);
5587
5588 /* Make a copy of the core state now as we start the if-block. */
5589 iemNativeCondStartIfBlock(pReNative, off);
5590
5591 return off;
5592}
5593
5594
5595#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
5596 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
5597 do {
5598
5599#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
5600 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
5601 do {
5602
5603/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
5604DECL_INLINE_THROW(uint32_t)
5605iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5606 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5607{
5608 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5609
5610 /* Get the eflags. */
5611 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5612 kIemNativeGstRegUse_ReadOnly);
5613
5614 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5615 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5616
5617 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5618 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5619 Assert(iBitNo1 != iBitNo2);
5620
5621#ifdef RT_ARCH_AMD64
5622 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
5623
5624 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5625 if (iBitNo1 > iBitNo2)
5626 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5627 else
5628 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5629 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5630
5631#elif defined(RT_ARCH_ARM64)
5632 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5633 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5634
5635 /* and tmpreg, eflreg, #1<<iBitNo1 */
5636 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5637
5638 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5639 if (iBitNo1 > iBitNo2)
5640 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5641 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5642 else
5643 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5644 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5645
5646 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5647
5648#else
5649# error "Port me"
5650#endif
5651
5652 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5653 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5654 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5655
5656 /* Free but don't flush the EFlags and tmp registers. */
5657 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5658 iemNativeRegFreeTmp(pReNative, idxEflReg);
5659
5660 /* Make a copy of the core state now as we start the if-block. */
5661 iemNativeCondStartIfBlock(pReNative, off);
5662
5663 return off;
5664}
5665
5666
5667#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
5668 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
5669 do {
5670
5671#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
5672 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
5673 do {
5674
5675/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
5676 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
5677DECL_INLINE_THROW(uint32_t)
5678iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
5679 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5680{
5681 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5682
5683 /* We need an if-block label for the non-inverted variant. */
5684 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
5685 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
5686
5687 /* Get the eflags. */
5688 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5689 kIemNativeGstRegUse_ReadOnly);
5690
5691 /* Translate the flag masks to bit numbers. */
5692 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5693 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5694
5695 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5696 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5697 Assert(iBitNo1 != iBitNo);
5698
5699 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5700 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5701 Assert(iBitNo2 != iBitNo);
5702 Assert(iBitNo2 != iBitNo1);
5703
5704#ifdef RT_ARCH_AMD64
5705 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
5706#elif defined(RT_ARCH_ARM64)
5707 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5708#endif
5709
5710 /* Check for the lone bit first. */
5711 if (!fInverted)
5712 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5713 else
5714 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
5715
5716 /* Then extract and compare the other two bits. */
5717#ifdef RT_ARCH_AMD64
5718 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5719 if (iBitNo1 > iBitNo2)
5720 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5721 else
5722 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5723 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5724
5725#elif defined(RT_ARCH_ARM64)
5726 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5727
5728 /* and tmpreg, eflreg, #1<<iBitNo1 */
5729 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5730
5731 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5732 if (iBitNo1 > iBitNo2)
5733 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5734 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5735 else
5736 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5737 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5738
5739 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5740
5741#else
5742# error "Port me"
5743#endif
5744
5745 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5746 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5747 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5748
5749 /* Free but don't flush the EFlags and tmp registers. */
5750 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5751 iemNativeRegFreeTmp(pReNative, idxEflReg);
5752
5753 /* Make a copy of the core state now as we start the if-block. */
5754 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
5755
5756 return off;
5757}
5758
5759
5760#define IEM_MC_IF_CX_IS_NZ() \
5761 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
5762 do {
5763
5764/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5765DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5766{
5767 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5768
5769 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5770 kIemNativeGstRegUse_ReadOnly);
5771 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5772 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5773
5774 iemNativeCondStartIfBlock(pReNative, off);
5775 return off;
5776}
5777
5778
5779#define IEM_MC_IF_ECX_IS_NZ() \
5780 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
5781 do {
5782
5783#define IEM_MC_IF_RCX_IS_NZ() \
5784 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
5785 do {
5786
5787/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
5788DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
5789{
5790 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5791
5792 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5793 kIemNativeGstRegUse_ReadOnly);
5794 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5795 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5796
5797 iemNativeCondStartIfBlock(pReNative, off);
5798 return off;
5799}
5800
5801
5802#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5803 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
5804 do {
5805
5806#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5807 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
5808 do {
5809
5810/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5811DECL_INLINE_THROW(uint32_t)
5812iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
5813{
5814 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5815
5816 /* We have to load both RCX and EFLAGS before we can start branching,
5817 otherwise we'll end up in the else-block with an inconsistent
5818 register allocator state.
5819 Doing EFLAGS first as it's more likely to be loaded, right? */
5820 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5821 kIemNativeGstRegUse_ReadOnly);
5822 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5823 kIemNativeGstRegUse_ReadOnly);
5824
5825 /** @todo we could reduce this to a single branch instruction by spending a
5826 * temporary register and some setnz stuff. Not sure if loops are
5827 * worth it. */
5828 /* Check CX. */
5829 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5830
5831 /* Check the EFlags bit. */
5832 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5833 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5834 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5835 !fCheckIfSet /*fJmpIfSet*/);
5836
5837 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5838 iemNativeRegFreeTmp(pReNative, idxEflReg);
5839
5840 iemNativeCondStartIfBlock(pReNative, off);
5841 return off;
5842}
5843
5844
5845#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5846 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
5847 do {
5848
5849#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5850 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
5851 do {
5852
5853#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5854 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
5855 do {
5856
5857#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5858 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
5859 do {
5860
5861/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
5862 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
5863 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
5864 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
5865DECL_INLINE_THROW(uint32_t)
5866iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5867 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
5868{
5869 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5870
5871 /* We have to load both RCX and EFLAGS before we can start branching,
5872 otherwise we'll end up in the else-block with an inconsistent
5873 register allocator state.
5874 Doing EFLAGS first as it's more likely to be loaded, right? */
5875 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5876 kIemNativeGstRegUse_ReadOnly);
5877 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5878 kIemNativeGstRegUse_ReadOnly);
5879
5880 /** @todo we could reduce this to a single branch instruction by spending a
5881 * temporary register and some setnz stuff. Not sure if loops are
5882 * worth it. */
5883 /* Check RCX/ECX. */
5884 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5885
5886 /* Check the EFlags bit. */
5887 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5888 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5889 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5890 !fCheckIfSet /*fJmpIfSet*/);
5891
5892 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5893 iemNativeRegFreeTmp(pReNative, idxEflReg);
5894
5895 iemNativeCondStartIfBlock(pReNative, off);
5896 return off;
5897}
5898
5899
5900
5901/*********************************************************************************************************************************
5902* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
5903*********************************************************************************************************************************/
5904/** Number of hidden arguments for CIMPL calls.
5905 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
5906#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5907# define IEM_CIMPL_HIDDEN_ARGS 3
5908#else
5909# define IEM_CIMPL_HIDDEN_ARGS 2
5910#endif
5911
5912#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
5913 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
5914
5915#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
5916 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
5917
5918#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
5919 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
5920
5921#define IEM_MC_LOCAL(a_Type, a_Name) \
5922 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
5923
5924#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
5925 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
5926
5927
5928/**
5929 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
5930 */
5931DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
5932{
5933 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
5934 return IEM_CIMPL_HIDDEN_ARGS;
5935 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
5936 return 1;
5937 return 0;
5938}
5939
5940
5941/**
5942 * Internal work that allocates a variable with kind set to
5943 * kIemNativeVarKind_Invalid and no current stack allocation.
5944 *
5945 * The kind will either be set by the caller or later when the variable is first
5946 * assigned a value.
5947 */
5948static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5949{
5950 Assert(cbType > 0 && cbType <= 64);
5951 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
5952 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
5953 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
5954 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5955 pReNative->Core.aVars[idxVar].cbVar = cbType;
5956 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5957 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5958 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
5959 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
5960 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
5961 pReNative->Core.aVars[idxVar].u.uValue = 0;
5962 return idxVar;
5963}
5964
5965
5966/**
5967 * Internal work that allocates an argument variable w/o setting enmKind.
5968 */
5969static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5970{
5971 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
5972 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5973 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
5974
5975 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5976 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
5977 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
5978 return idxVar;
5979}
5980
5981
5982/**
5983 * Gets the stack slot for a stack variable, allocating one if necessary.
5984 *
5985 * Calling this function implies that the stack slot will contain a valid
5986 * variable value. The caller deals with any register currently assigned to the
5987 * variable, typically by spilling it into the stack slot.
5988 *
5989 * @returns The stack slot number.
5990 * @param pReNative The recompiler state.
5991 * @param idxVar The variable.
5992 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
5993 */
5994DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5995{
5996 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5997 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5998
5999 /* Already got a slot? */
6000 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6001 if (idxStackSlot != UINT8_MAX)
6002 {
6003 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6004 return idxStackSlot;
6005 }
6006
6007 /*
6008 * A single slot is easy to allocate.
6009 * Allocate them from the top end, closest to BP, to reduce the displacement.
6010 */
6011 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6012 {
6013 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6014 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6015 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6016 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6017 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6018 return (uint8_t)iSlot;
6019 }
6020
6021 /*
6022 * We need more than one stack slot.
6023 *
6024 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6025 */
6026 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6027 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6028 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6029 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6030 uint32_t bmStack = ~pReNative->Core.bmStack;
6031 while (bmStack != UINT32_MAX)
6032 {
6033/** @todo allocate from the top to reduce BP displacement. */
6034 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6035 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6036 if (!(iSlot & fBitAlignMask))
6037 {
6038 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6039 {
6040 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6041 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6042 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6043 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6044 return (uint8_t)iSlot;
6045 }
6046 }
6047 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6048 }
6049 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6050}
6051
6052
6053/**
6054 * Changes the variable to a stack variable.
6055 *
6056 * Currently this is s only possible to do the first time the variable is used,
6057 * switching later is can be implemented but not done.
6058 *
6059 * @param pReNative The recompiler state.
6060 * @param idxVar The variable.
6061 * @throws VERR_IEM_VAR_IPE_2
6062 */
6063static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6064{
6065 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6066 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6067 {
6068 /* We could in theory transition from immediate to stack as well, but it
6069 would involve the caller doing work storing the value on the stack. So,
6070 till that's required we only allow transition from invalid. */
6071 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6072 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6073 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6074 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6075
6076 /* Note! We don't allocate a stack slot here, that's only done when a
6077 slot is actually needed to hold a variable value. */
6078 }
6079}
6080
6081
6082/**
6083 * Sets it to a variable with a constant value.
6084 *
6085 * This does not require stack storage as we know the value and can always
6086 * reload it, unless of course it's referenced.
6087 *
6088 * @param pReNative The recompiler state.
6089 * @param idxVar The variable.
6090 * @param uValue The immediate value.
6091 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6092 */
6093static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6094{
6095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6096 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6097 {
6098 /* Only simple transitions for now. */
6099 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6100 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6101 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6102 }
6103 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6104
6105 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6106}
6107
6108
6109/**
6110 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6111 *
6112 * This does not require stack storage as we know the value and can always
6113 * reload it. Loading is postponed till needed.
6114 *
6115 * @param pReNative The recompiler state.
6116 * @param idxVar The variable.
6117 * @param idxOtherVar The variable to take the (stack) address of.
6118 *
6119 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6120 */
6121static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6122{
6123 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6124 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6125
6126 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6127 {
6128 /* Only simple transitions for now. */
6129 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6130 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6131 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6132 }
6133 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6134
6135 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
6136
6137 /* Update the other variable, ensure it's a stack variable. */
6138 /** @todo handle variables with const values... that'll go boom now. */
6139 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6140 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
6141}
6142
6143
6144/**
6145 * Sets the variable to a reference (pointer) to a guest register reference.
6146 *
6147 * This does not require stack storage as we know the value and can always
6148 * reload it. Loading is postponed till needed.
6149 *
6150 * @param pReNative The recompiler state.
6151 * @param idxVar The variable.
6152 * @param enmRegClass The class guest registers to reference.
6153 * @param idxReg The register within @a enmRegClass to reference.
6154 *
6155 * @throws VERR_IEM_VAR_IPE_2
6156 */
6157static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6158 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6159{
6160 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6161
6162 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
6163 {
6164 /* Only simple transitions for now. */
6165 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6166 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6167 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
6168 }
6169 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6170
6171 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
6172 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
6173}
6174
6175
6176DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6177{
6178 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6179}
6180
6181
6182DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6183{
6184 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6185 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6186 return idxVar;
6187}
6188
6189
6190DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6191{
6192 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6193 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6194 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6195 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6196
6197 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6198 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
6199 return idxArgVar;
6200}
6201
6202
6203DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6204{
6205 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6206 /* Don't set to stack now, leave that to the first use as for instance
6207 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6208 return idxVar;
6209}
6210
6211
6212DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6213{
6214 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6215 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6216 return idxVar;
6217}
6218
6219
6220/**
6221 * Makes sure variable @a idxVar has a register assigned to it.
6222 *
6223 * @returns The host register number.
6224 * @param pReNative The recompiler state.
6225 * @param idxVar The variable.
6226 * @param poff Pointer to the instruction buffer offset.
6227 * In case a register needs to be freed up or the value
6228 * loaded off the stack.
6229 * @param fInitialized Set if the variable must already have been initialized.
6230 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6231 * the case.
6232 */
6233DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6234 uint32_t *poff, bool fInitialized = false)
6235{
6236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6237 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
6238/** @todo we must mark the variable as active and add a release function to
6239 * mark it as inactive, otherwise temporary register allocations may
6240 * cause the variable to be spilled onto the stack. */
6241
6242 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6243 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6244 {
6245 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
6246 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6247 return idxReg;
6248 }
6249
6250 /*
6251 * If the kind of variable has not yet been set, default to 'stack'.
6252 */
6253 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
6254 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6255 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
6256 iemNativeVarSetKindToStack(pReNative, idxVar);
6257
6258 /*
6259 * We have to allocate a register for the variable, even if its a stack one
6260 * as we don't know if there are modification being made to it before its
6261 * finalized (todo: analyze and insert hints about that?).
6262 *
6263 * If we can, we try get the correct register for argument variables. This
6264 * is assuming that most argument variables are fetched as close as possible
6265 * to the actual call, so that there aren't any interfering hidden calls
6266 * (memory accesses, etc) inbetween.
6267 *
6268 * If we cannot or it's a variable, we make sure no argument registers
6269 * that will be used by this MC block will be allocated here, and we always
6270 * prefer non-volatile registers to avoid needing to spill stuff for internal
6271 * call.
6272 */
6273 /** @todo Detect too early argument value fetches and warn about hidden
6274 * calls causing less optimal code to be generated in the python script. */
6275
6276 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6277 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6278 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6279 {
6280 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6281 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6282 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6283 }
6284 else
6285 {
6286 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6287 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6288 & ~pReNative->Core.bmHstRegsWithGstShadow
6289 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6290 & fNotArgsMask;
6291 if (fRegs)
6292 {
6293 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6294 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6295 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6296 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6297 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6298 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6299 }
6300 else
6301 {
6302 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6303 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6304 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6305 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6306 }
6307 }
6308 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6309 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6310
6311 /*
6312 * Load it off the stack if we've got a stack slot.
6313 */
6314 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6315 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6316 {
6317 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6318 switch (pReNative->Core.aVars[idxVar].cbVar)
6319 {
6320 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6321 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6322 case 3: AssertFailed(); RT_FALL_THRU();
6323 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6324 default: AssertFailed(); RT_FALL_THRU();
6325 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6326 }
6327 }
6328 else
6329 {
6330 Assert(idxStackSlot == UINT8_MAX);
6331 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6332 }
6333 return idxReg;
6334}
6335
6336
6337/**
6338 * The value of variable @a idxVar will be written in full to the @a enmGstReg
6339 * guest register.
6340 *
6341 * This function makes sure there is a register for it and sets it to be the
6342 * current shadow copy of @a enmGstReg.
6343 *
6344 * @returns The host register number.
6345 * @param pReNative The recompiler state.
6346 * @param idxVar The variable.
6347 * @param enmGstReg The guest register this variable will be written to
6348 * after this call.
6349 * @param poff Pointer to the instruction buffer offset.
6350 * In case a register needs to be freed up or if the
6351 * variable content needs to be loaded off the stack.
6352 *
6353 * @note We DO NOT expect @a idxVar to be an argument variable,
6354 * because we can only in the commit stage of an instruction when this
6355 * function is used.
6356 */
6357DECL_HIDDEN_THROW(uint8_t)
6358iemNativeVarAllocRegisterForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
6359{
6360 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6361 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
6362 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
6363 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
6364 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
6365 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
6366 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6367
6368 /*
6369 * This shouldn't ever be used for arguments, unless it's in a weird else
6370 * branch that doesn't do any calling and even then it's questionable.
6371 *
6372 * However, in case someone writes crazy wrong MC code and does register
6373 * updates before making calls, just use the regular register allocator to
6374 * ensure we get a register suitable for the intended argument number.
6375 */
6376 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarAllocRegister(pReNative, idxVar, poff));
6377
6378 /*
6379 * If there is already a register for the variable, we transfer/set the
6380 * guest shadow copy assignment to it.
6381 */
6382 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6383 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6384 {
6385 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
6386 {
6387 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
6388 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
6389 Log12(("iemNativeVarAllocRegisterForGuestReg: Moved %s for guest %s into %s for full write\n",
6390 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
6391 }
6392 else
6393 {
6394 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
6395 Log12(("iemNativeVarAllocRegisterForGuestReg: Marking %s as copy of guest %s (full write)\n",
6396 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
6397 }
6398 /** @todo figure this one out. We need some way of making sure the register isn't
6399 * modified after this point, just in case we start writing crappy MC code. */
6400 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
6401 return idxReg;
6402 }
6403 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6404
6405 /*
6406 * Because this is supposed to be the commit stage, we're just tag along with the
6407 * temporary register allocator and upgrade it to a variable register.
6408 */
6409 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
6410 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
6411 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
6412 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
6413 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
6414 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6415
6416 /*
6417 * Now we need to load the register value.
6418 */
6419 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
6420 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
6421 else
6422 {
6423 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6424 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6425 switch (pReNative->Core.aVars[idxVar].cbVar)
6426 {
6427 case sizeof(uint64_t):
6428 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
6429 break;
6430 case sizeof(uint32_t):
6431 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
6432 break;
6433 case sizeof(uint16_t):
6434 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
6435 break;
6436 case sizeof(uint8_t):
6437 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
6438 break;
6439 default:
6440 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6441 }
6442 }
6443
6444 return idxReg;
6445}
6446
6447
6448/**
6449 * Sets the host register for @a idxVarRc to @a idxReg.
6450 *
6451 * The register must not be allocated. Any guest register shadowing will be
6452 * implictly dropped by this call.
6453 *
6454 * The variable must not have any register associated with it (causes
6455 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
6456 * implied.
6457 *
6458 * @returns idxReg
6459 * @param pReNative The recompiler state.
6460 * @param idxVar The variable.
6461 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
6462 * @param off For recording in debug info.
6463 *
6464 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
6465 */
6466DECL_INLINE_THROW(uint8_t) iemNativeVarSetRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
6467{
6468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6469 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6470 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
6471 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
6472
6473 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
6474 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6475
6476 iemNativeVarSetKindToStack(pReNative, idxVar);
6477 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6478
6479 return idxReg;
6480}
6481
6482
6483/**
6484 * Worker that frees the stack slots for variable @a idxVar if any allocated.
6485 *
6486 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
6487 */
6488DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6489{
6490 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6491 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6492 {
6493 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
6494 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
6495 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
6496 Assert(cSlots > 0);
6497 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
6498 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
6499 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
6500 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6501 }
6502 else
6503 Assert(idxStackSlot == UINT8_MAX);
6504}
6505
6506
6507/**
6508 * Worker that frees a single variable.
6509 *
6510 * ASSUMES that @a idxVar is valid.
6511 */
6512DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6513{
6514 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
6515 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
6516
6517 /* Free the host register first if any assigned. */
6518 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6519 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6520 {
6521 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
6522 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
6523 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6524 }
6525
6526 /* Free argument mapping. */
6527 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6528 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
6529 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
6530
6531 /* Free the stack slots. */
6532 iemNativeVarFreeStackSlots(pReNative, idxVar);
6533
6534 /* Free the actual variable. */
6535 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6536 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6537}
6538
6539
6540/**
6541 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
6542 */
6543DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
6544{
6545 while (bmVars != 0)
6546 {
6547 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
6548 bmVars &= ~RT_BIT_32(idxVar);
6549
6550#if 1 /** @todo optimize by simplifying this later... */
6551 iemNativeVarFreeOneWorker(pReNative, idxVar);
6552#else
6553 /* Only need to free the host register, the rest is done as bulk updates below. */
6554 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6555 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6556 {
6557 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
6558 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
6559 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6560 }
6561#endif
6562 }
6563#if 0 /** @todo optimize by simplifying this later... */
6564 pReNative->Core.bmVars = 0;
6565 pReNative->Core.bmStack = 0;
6566 pReNative->Core.u64ArgVars = UINT64_MAX;
6567#endif
6568}
6569
6570
6571/**
6572 * This is called by IEM_MC_END() to clean up all variables.
6573 */
6574DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
6575{
6576 uint32_t const bmVars = pReNative->Core.bmVars;
6577 if (bmVars != 0)
6578 iemNativeVarFreeAllSlow(pReNative, bmVars);
6579 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6580 Assert(pReNative->Core.bmStack == 0);
6581}
6582
6583
6584#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
6585
6586/**
6587 * This is called by IEM_MC_FREE_LOCAL.
6588 */
6589DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6590{
6591 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6592 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6593 iemNativeVarFreeOneWorker(pReNative, idxVar);
6594}
6595
6596
6597#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
6598
6599/**
6600 * This is called by IEM_MC_FREE_ARG.
6601 */
6602DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6603{
6604 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6605 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
6606 iemNativeVarFreeOneWorker(pReNative, idxVar);
6607}
6608
6609
6610#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
6611
6612/**
6613 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
6614 */
6615DECL_INLINE_THROW(uint32_t)
6616iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
6617{
6618 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
6619 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
6620 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6621 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
6622 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
6623
6624 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
6625 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
6626 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
6627 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6628
6629 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
6630
6631 /*
6632 * Special case for immediates.
6633 */
6634 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
6635 {
6636 switch (pReNative->Core.aVars[idxVarDst].cbVar)
6637 {
6638 case sizeof(uint16_t):
6639 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
6640 break;
6641 case sizeof(uint32_t):
6642 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
6643 break;
6644 default: AssertFailed(); break;
6645 }
6646 }
6647 else
6648 {
6649 /*
6650 * The generic solution for now.
6651 */
6652 /** @todo optimize this by having the python script make sure the source
6653 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
6654 * statement. Then we could just transfer the register assignments. */
6655 uint8_t const idxRegDst = iemNativeVarAllocRegister(pReNative, idxVarDst, &off);
6656 uint8_t const idxRegSrc = iemNativeVarAllocRegister(pReNative, idxVarSrc, &off);
6657 switch (pReNative->Core.aVars[idxVarDst].cbVar)
6658 {
6659 case sizeof(uint16_t):
6660 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
6661 break;
6662 case sizeof(uint32_t):
6663 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
6664 break;
6665 default: AssertFailed(); break;
6666 }
6667 }
6668 return off;
6669}
6670
6671
6672
6673/*********************************************************************************************************************************
6674* Emitters for IEM_MC_CALL_CIMPL_XXX *
6675*********************************************************************************************************************************/
6676
6677/**
6678 * Emits code to load a reference to the given guest register into @a idxGprDst.
6679 */
6680DECL_INLINE_THROW(uint32_t)
6681iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
6682 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
6683{
6684 /*
6685 * Get the offset relative to the CPUMCTX structure.
6686 */
6687 uint32_t offCpumCtx;
6688 switch (enmClass)
6689 {
6690 case kIemNativeGstRegRef_Gpr:
6691 Assert(idxRegInClass < 16);
6692 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
6693 break;
6694
6695 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
6696 Assert(idxRegInClass < 4);
6697 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
6698 break;
6699
6700 case kIemNativeGstRegRef_EFlags:
6701 Assert(idxRegInClass == 0);
6702 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
6703 break;
6704
6705 case kIemNativeGstRegRef_MxCsr:
6706 Assert(idxRegInClass == 0);
6707 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
6708 break;
6709
6710 case kIemNativeGstRegRef_FpuReg:
6711 Assert(idxRegInClass < 8);
6712 AssertFailed(); /** @todo what kind of indexing? */
6713 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
6714 break;
6715
6716 case kIemNativeGstRegRef_MReg:
6717 Assert(idxRegInClass < 8);
6718 AssertFailed(); /** @todo what kind of indexing? */
6719 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
6720 break;
6721
6722 case kIemNativeGstRegRef_XReg:
6723 Assert(idxRegInClass < 16);
6724 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
6725 break;
6726
6727 default:
6728 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
6729 }
6730
6731 /*
6732 * Load the value into the destination register.
6733 */
6734#ifdef RT_ARCH_AMD64
6735 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
6736
6737#elif defined(RT_ARCH_ARM64)
6738 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6739 Assert(offCpumCtx < 4096);
6740 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
6741
6742#else
6743# error "Port me!"
6744#endif
6745
6746 return off;
6747}
6748
6749
6750/**
6751 * Common code for CIMPL and AIMPL calls.
6752 *
6753 * These are calls that uses argument variables and such. They should not be
6754 * confused with internal calls required to implement an MC operation,
6755 * like a TLB load and similar.
6756 *
6757 * Upon return all that is left to do is to load any hidden arguments and
6758 * perform the call. All argument variables are freed.
6759 *
6760 * @returns New code buffer offset; throws VBox status code on error.
6761 * @param pReNative The native recompile state.
6762 * @param off The code buffer offset.
6763 * @param cArgs The total nubmer of arguments (includes hidden
6764 * count).
6765 * @param cHiddenArgs The number of hidden arguments. The hidden
6766 * arguments must not have any variable declared for
6767 * them, whereas all the regular arguments must
6768 * (tstIEMCheckMc ensures this).
6769 */
6770DECL_HIDDEN_THROW(uint32_t)
6771iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
6772{
6773#ifdef VBOX_STRICT
6774 /*
6775 * Assert sanity.
6776 */
6777 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
6778 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
6779 for (unsigned i = 0; i < cHiddenArgs; i++)
6780 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
6781 for (unsigned i = cHiddenArgs; i < cArgs; i++)
6782 {
6783 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
6784 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
6785 }
6786 iemNativeRegAssertSanity(pReNative);
6787#endif
6788
6789 /*
6790 * Before we do anything else, go over variables that are referenced and
6791 * make sure they are not in a register.
6792 */
6793 uint32_t bmVars = pReNative->Core.bmVars;
6794 if (bmVars)
6795 {
6796 do
6797 {
6798 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
6799 bmVars &= ~RT_BIT_32(idxVar);
6800
6801 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
6802 {
6803 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
6804 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
6805 {
6806 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6807 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
6808 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
6809 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
6810 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
6811
6812 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6813 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
6814 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
6815 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
6816 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
6817 }
6818 }
6819 } while (bmVars != 0);
6820#if 0 //def VBOX_STRICT
6821 iemNativeRegAssertSanity(pReNative);
6822#endif
6823 }
6824
6825 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
6826
6827 /*
6828 * First, go over the host registers that will be used for arguments and make
6829 * sure they either hold the desired argument or are free.
6830 */
6831 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
6832 {
6833 for (uint32_t i = 0; i < cRegArgs; i++)
6834 {
6835 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6836 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6837 {
6838 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
6839 {
6840 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
6841 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6842 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
6843 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6844 if (uArgNo == i)
6845 { /* prefect */ }
6846 /* The variable allocator logic should make sure this is impossible,
6847 except for when the return register is used as a parameter (ARM,
6848 but not x86). */
6849#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
6850 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
6851 {
6852# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
6853# error "Implement this"
6854# endif
6855 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
6856 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
6857 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
6858 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
6859 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
6860 }
6861#endif
6862 else
6863 {
6864 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
6865
6866 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6867 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
6868 else
6869 {
6870 /* just free it, can be reloaded if used again */
6871 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6872 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
6873 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
6874 }
6875 }
6876 }
6877 else
6878 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
6879 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
6880 }
6881 }
6882#if 0 //def VBOX_STRICT
6883 iemNativeRegAssertSanity(pReNative);
6884#endif
6885 }
6886
6887 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
6888
6889#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
6890 /*
6891 * If there are any stack arguments, make sure they are in their place as well.
6892 *
6893 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
6894 * the caller) be loading it later and it must be free (see first loop).
6895 */
6896 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
6897 {
6898 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
6899 {
6900 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6901 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
6902 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6903 {
6904 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
6905 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
6906 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
6907 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6908 }
6909 else
6910 {
6911 /* Use ARG0 as temp for stuff we need registers for. */
6912 switch (pReNative->Core.aVars[idxVar].enmKind)
6913 {
6914 case kIemNativeVarKind_Stack:
6915 {
6916 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6917 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6918 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
6919 iemNativeStackCalcBpDisp(idxStackSlot));
6920 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6921 continue;
6922 }
6923
6924 case kIemNativeVarKind_Immediate:
6925 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
6926 continue;
6927
6928 case kIemNativeVarKind_VarRef:
6929 {
6930 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
6931 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
6932 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
6933 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
6934 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
6935 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
6936 {
6937 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
6938 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
6939 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6940 }
6941 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
6942 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6943 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
6944 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6945 continue;
6946 }
6947
6948 case kIemNativeVarKind_GstRegRef:
6949 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
6950 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
6951 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
6952 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6953 continue;
6954
6955 case kIemNativeVarKind_Invalid:
6956 case kIemNativeVarKind_End:
6957 break;
6958 }
6959 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6960 }
6961 }
6962# if 0 //def VBOX_STRICT
6963 iemNativeRegAssertSanity(pReNative);
6964# endif
6965 }
6966#else
6967 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
6968#endif
6969
6970 /*
6971 * Make sure the argument variables are loaded into their respective registers.
6972 *
6973 * We can optimize this by ASSUMING that any register allocations are for
6974 * registeres that have already been loaded and are ready. The previous step
6975 * saw to that.
6976 */
6977 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
6978 {
6979 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6980 {
6981 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6982 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6983 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
6984 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
6985 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
6986 else
6987 {
6988 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6989 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6990 {
6991 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6992 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
6993 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
6994 | RT_BIT_32(idxArgReg);
6995 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
6996 }
6997 else
6998 {
6999 /* Use ARG0 as temp for stuff we need registers for. */
7000 switch (pReNative->Core.aVars[idxVar].enmKind)
7001 {
7002 case kIemNativeVarKind_Stack:
7003 {
7004 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7005 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7006 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7007 continue;
7008 }
7009
7010 case kIemNativeVarKind_Immediate:
7011 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
7012 continue;
7013
7014 case kIemNativeVarKind_VarRef:
7015 {
7016 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7017 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7018 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7019 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7020 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7021 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7022 {
7023 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7024 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7025 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7026 }
7027 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7028 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7029 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
7030 continue;
7031 }
7032
7033 case kIemNativeVarKind_GstRegRef:
7034 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
7035 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7036 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7037 continue;
7038
7039 case kIemNativeVarKind_Invalid:
7040 case kIemNativeVarKind_End:
7041 break;
7042 }
7043 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7044 }
7045 }
7046 }
7047#if 0 //def VBOX_STRICT
7048 iemNativeRegAssertSanity(pReNative);
7049#endif
7050 }
7051#ifdef VBOX_STRICT
7052 else
7053 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7054 {
7055 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
7056 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
7057 }
7058#endif
7059
7060 /*
7061 * Free all argument variables (simplified).
7062 * Their lifetime always expires with the call they are for.
7063 */
7064 /** @todo Make the python script check that arguments aren't used after
7065 * IEM_MC_CALL_XXXX. */
7066 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
7067 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
7068 * an argument value. There is also some FPU stuff. */
7069 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
7070 {
7071 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7072 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7073
7074 /* no need to free registers: */
7075 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
7076 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
7077 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
7078 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
7079 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
7080 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
7081
7082 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
7083 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7084 iemNativeVarFreeStackSlots(pReNative, idxVar);
7085 }
7086 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7087
7088 /*
7089 * Flush volatile registers as we make the call.
7090 */
7091 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
7092
7093 return off;
7094}
7095
7096
7097/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
7098DECL_HIDDEN_THROW(uint32_t)
7099iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
7100 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
7101
7102{
7103 /*
7104 * Do all the call setup and cleanup.
7105 */
7106 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
7107
7108 /*
7109 * Load the two or three hidden arguments.
7110 */
7111#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7112 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7113 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7114 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
7115#else
7116 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7117 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
7118#endif
7119
7120 /*
7121 * Make the call and check the return code.
7122 *
7123 * Shadow PC copies are always flushed here, other stuff depends on flags.
7124 * Segment and general purpose registers are explictily flushed via the
7125 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
7126 * macros.
7127 */
7128 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
7129#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7130 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7131#endif
7132 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
7133 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
7134 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
7135 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7136
7137 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7138}
7139
7140
7141#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7142 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
7143
7144/** Emits code for IEM_MC_CALL_CIMPL_1. */
7145DECL_INLINE_THROW(uint32_t)
7146iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7147 uintptr_t pfnCImpl, uint8_t idxArg0)
7148{
7149 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7150 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
7151}
7152
7153
7154#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7155 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
7156
7157/** Emits code for IEM_MC_CALL_CIMPL_2. */
7158DECL_INLINE_THROW(uint32_t)
7159iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7160 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
7161{
7162 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7163 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7164 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
7165}
7166
7167
7168#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7169 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7170 (uintptr_t)a_pfnCImpl, a0, a1, a2)
7171
7172/** Emits code for IEM_MC_CALL_CIMPL_3. */
7173DECL_INLINE_THROW(uint32_t)
7174iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7175 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7176{
7177 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7178 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7179 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7180 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
7181}
7182
7183
7184#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
7185 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7186 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
7187
7188/** Emits code for IEM_MC_CALL_CIMPL_4. */
7189DECL_INLINE_THROW(uint32_t)
7190iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7191 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7192{
7193 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7194 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7195 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7196 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7197 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
7198}
7199
7200
7201#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
7202 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7203 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
7204
7205/** Emits code for IEM_MC_CALL_CIMPL_4. */
7206DECL_INLINE_THROW(uint32_t)
7207iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7208 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
7209{
7210 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7211 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7212 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7213 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7214 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
7215 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
7216}
7217
7218
7219/** Recompiler debugging: Flush guest register shadow copies. */
7220#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
7221
7222
7223
7224/*********************************************************************************************************************************
7225* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
7226*********************************************************************************************************************************/
7227
7228/**
7229 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
7230 */
7231DECL_INLINE_THROW(uint32_t)
7232iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7233 uintptr_t pfnAImpl, uint8_t cArgs)
7234{
7235 if (idxVarRc != UINT8_MAX)
7236 {
7237 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
7238 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
7239 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
7240 }
7241
7242 /*
7243 * Do all the call setup and cleanup.
7244 */
7245 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
7246
7247 /*
7248 * Make the call and update the return code variable if we've got one.
7249 */
7250 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7251 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
7252 {
7253pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
7254 iemNativeVarSetRegister(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
7255 }
7256
7257 return off;
7258}
7259
7260
7261
7262#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
7263 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
7264
7265#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
7266 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
7267
7268/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
7269DECL_INLINE_THROW(uint32_t)
7270iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
7271{
7272 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
7273}
7274
7275
7276#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
7277 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
7278
7279#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
7280 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
7281
7282/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
7283DECL_INLINE_THROW(uint32_t)
7284iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
7285{
7286 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7287 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
7288}
7289
7290
7291#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
7292 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
7293
7294#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
7295 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
7296
7297/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
7298DECL_INLINE_THROW(uint32_t)
7299iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7300 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7301{
7302 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7303 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7304 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
7305}
7306
7307
7308#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
7309 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
7310
7311#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
7312 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
7313
7314/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
7315DECL_INLINE_THROW(uint32_t)
7316iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7317 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7318{
7319 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7320 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7321 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7322 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
7323}
7324
7325
7326#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
7327 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7328
7329#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
7330 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7331
7332/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
7333DECL_INLINE_THROW(uint32_t)
7334iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7335 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7336{
7337 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7338 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7339 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7340 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
7341 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
7342}
7343
7344
7345
7346/*********************************************************************************************************************************
7347* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
7348*********************************************************************************************************************************/
7349
7350#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
7351 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
7352
7353#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7354 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
7355
7356#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7357 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
7358
7359#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7360 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
7361
7362
7363/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
7364 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
7365DECL_INLINE_THROW(uint32_t)
7366iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
7367{
7368 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7369 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7370 Assert(iGRegEx < 20);
7371
7372 /* Same discussion as in iemNativeEmitFetchGregU16 */
7373 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7374 kIemNativeGstRegUse_ReadOnly);
7375
7376 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7377 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7378
7379 /* The value is zero-extended to the full 64-bit host register width. */
7380 if (iGRegEx < 16)
7381 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7382 else
7383 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7384
7385 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7386 return off;
7387}
7388
7389
7390#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7391 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
7392
7393#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7394 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
7395
7396#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7397 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
7398
7399/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
7400DECL_INLINE_THROW(uint32_t)
7401iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
7402{
7403 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7404 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7405 Assert(iGRegEx < 20);
7406
7407 /* Same discussion as in iemNativeEmitFetchGregU16 */
7408 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7409 kIemNativeGstRegUse_ReadOnly);
7410
7411 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7412 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7413
7414 if (iGRegEx < 16)
7415 {
7416 switch (cbSignExtended)
7417 {
7418 case sizeof(uint16_t):
7419 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7420 break;
7421 case sizeof(uint32_t):
7422 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7423 break;
7424 case sizeof(uint64_t):
7425 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7426 break;
7427 default: AssertFailed(); break;
7428 }
7429 }
7430 else
7431 {
7432 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7433 switch (cbSignExtended)
7434 {
7435 case sizeof(uint16_t):
7436 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7437 break;
7438 case sizeof(uint32_t):
7439 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7440 break;
7441 case sizeof(uint64_t):
7442 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7443 break;
7444 default: AssertFailed(); break;
7445 }
7446 }
7447
7448 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7449 return off;
7450}
7451
7452
7453
7454#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
7455 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
7456
7457#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
7458 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7459
7460#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
7461 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7462
7463/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
7464DECL_INLINE_THROW(uint32_t)
7465iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7466{
7467 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7468 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7469 Assert(iGReg < 16);
7470
7471 /*
7472 * We can either just load the low 16-bit of the GPR into a host register
7473 * for the variable, or we can do so via a shadow copy host register. The
7474 * latter will avoid having to reload it if it's being stored later, but
7475 * will waste a host register if it isn't touched again. Since we don't
7476 * know what going to happen, we choose the latter for now.
7477 */
7478 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7479 kIemNativeGstRegUse_ReadOnly);
7480
7481 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7482 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7483 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7484
7485 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7486 return off;
7487}
7488
7489
7490#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
7491 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7492
7493#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
7494 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7495
7496/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
7497DECL_INLINE_THROW(uint32_t)
7498iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
7499{
7500 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7501 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7502 Assert(iGReg < 16);
7503
7504 /*
7505 * We can either just load the low 16-bit of the GPR into a host register
7506 * for the variable, or we can do so via a shadow copy host register. The
7507 * latter will avoid having to reload it if it's being stored later, but
7508 * will waste a host register if it isn't touched again. Since we don't
7509 * know what going to happen, we choose the latter for now.
7510 */
7511 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7512 kIemNativeGstRegUse_ReadOnly);
7513
7514 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7515 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7516 if (cbSignExtended == sizeof(uint32_t))
7517 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7518 else
7519 {
7520 Assert(cbSignExtended == sizeof(uint64_t));
7521 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7522 }
7523
7524 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7525 return off;
7526}
7527
7528
7529#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
7530 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
7531
7532#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
7533 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
7534
7535/** Emits code for IEM_MC_FETCH_GREG_U32. */
7536DECL_INLINE_THROW(uint32_t)
7537iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7538{
7539 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7540 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
7541 Assert(iGReg < 16);
7542
7543 /*
7544 * We can either just load the low 16-bit of the GPR into a host register
7545 * for the variable, or we can do so via a shadow copy host register. The
7546 * latter will avoid having to reload it if it's being stored later, but
7547 * will waste a host register if it isn't touched again. Since we don't
7548 * know what going to happen, we choose the latter for now.
7549 */
7550 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7551 kIemNativeGstRegUse_ReadOnly);
7552
7553 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7554 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7555 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
7556
7557 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7558 return off;
7559}
7560
7561
7562#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
7563 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
7564
7565/** Emits code for IEM_MC_FETCH_GREG_U32. */
7566DECL_INLINE_THROW(uint32_t)
7567iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
7568{
7569 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7570 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
7571 Assert(iGReg < 16);
7572
7573 /*
7574 * We can either just load the low 32-bit of the GPR into a host register
7575 * for the variable, or we can do so via a shadow copy host register. The
7576 * latter will avoid having to reload it if it's being stored later, but
7577 * will waste a host register if it isn't touched again. Since we don't
7578 * know what going to happen, we choose the latter for now.
7579 */
7580 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7581 kIemNativeGstRegUse_ReadOnly);
7582
7583 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7584 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7585 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
7586
7587 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7588 return off;
7589}
7590
7591
7592#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
7593 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
7594
7595#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
7596 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
7597
7598/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
7599 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
7600DECL_INLINE_THROW(uint32_t)
7601iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
7602{
7603 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7604 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
7605 Assert(iGReg < 16);
7606
7607 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7608 kIemNativeGstRegUse_ReadOnly);
7609
7610 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7611 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7612 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
7613 /** @todo name the register a shadow one already? */
7614
7615 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7616 return off;
7617}
7618
7619
7620
7621/*********************************************************************************************************************************
7622* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
7623*********************************************************************************************************************************/
7624
7625#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
7626 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
7627
7628/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
7629DECL_INLINE_THROW(uint32_t)
7630iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
7631{
7632 Assert(iGRegEx < 20);
7633 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7634 kIemNativeGstRegUse_ForUpdate);
7635#ifdef RT_ARCH_AMD64
7636 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
7637
7638 /* To the lowest byte of the register: mov r8, imm8 */
7639 if (iGRegEx < 16)
7640 {
7641 if (idxGstTmpReg >= 8)
7642 pbCodeBuf[off++] = X86_OP_REX_B;
7643 else if (idxGstTmpReg >= 4)
7644 pbCodeBuf[off++] = X86_OP_REX;
7645 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
7646 pbCodeBuf[off++] = u8Value;
7647 }
7648 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
7649 else if (idxGstTmpReg < 4)
7650 {
7651 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
7652 pbCodeBuf[off++] = u8Value;
7653 }
7654 else
7655 {
7656 /* ror reg64, 8 */
7657 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7658 pbCodeBuf[off++] = 0xc1;
7659 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7660 pbCodeBuf[off++] = 8;
7661
7662 /* mov reg8, imm8 */
7663 if (idxGstTmpReg >= 8)
7664 pbCodeBuf[off++] = X86_OP_REX_B;
7665 else if (idxGstTmpReg >= 4)
7666 pbCodeBuf[off++] = X86_OP_REX;
7667 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
7668 pbCodeBuf[off++] = u8Value;
7669
7670 /* rol reg64, 8 */
7671 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7672 pbCodeBuf[off++] = 0xc1;
7673 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
7674 pbCodeBuf[off++] = 8;
7675 }
7676
7677#elif defined(RT_ARCH_ARM64)
7678 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
7679 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7680 if (iGRegEx < 16)
7681 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
7682 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
7683 else
7684 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
7685 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
7686 iemNativeRegFreeTmp(pReNative, idxImmReg);
7687
7688#else
7689# error "Port me!"
7690#endif
7691
7692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7693
7694 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
7695
7696 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7697 return off;
7698}
7699
7700
7701#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
7702 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
7703
7704/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
7705DECL_INLINE_THROW(uint32_t)
7706iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
7707{
7708 Assert(iGRegEx < 20);
7709 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
7710
7711 /*
7712 * If it's a constant value (unlikely) we treat this as a
7713 * IEM_MC_STORE_GREG_U8_CONST statement.
7714 */
7715 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
7716 { /* likely */ }
7717 else
7718 {
7719 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
7720 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7721 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
7722 }
7723
7724 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7725 kIemNativeGstRegUse_ForUpdate);
7726 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxValueVar, &off, true /*fInitialized*/);
7727
7728#ifdef RT_ARCH_AMD64
7729 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
7730 if (iGRegEx < 16)
7731 {
7732 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7733 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
7734 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
7735 else if (idxGstTmpReg >= 4)
7736 pbCodeBuf[off++] = X86_OP_REX;
7737 pbCodeBuf[off++] = 0x8a;
7738 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
7739 }
7740 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
7741 else if (idxGstTmpReg < 4 && idxVarReg < 4)
7742 {
7743 /** @todo test this. */
7744 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
7745 pbCodeBuf[off++] = 0x8a;
7746 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
7747 }
7748 else
7749 {
7750 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
7751
7752 /* ror reg64, 8 */
7753 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7754 pbCodeBuf[off++] = 0xc1;
7755 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7756 pbCodeBuf[off++] = 8;
7757
7758 /* mov reg8, reg8(r/m) */
7759 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
7760 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
7761 else if (idxGstTmpReg >= 4)
7762 pbCodeBuf[off++] = X86_OP_REX;
7763 pbCodeBuf[off++] = 0x8a;
7764 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
7765
7766 /* rol reg64, 8 */
7767 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7768 pbCodeBuf[off++] = 0xc1;
7769 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
7770 pbCodeBuf[off++] = 8;
7771 }
7772
7773#elif defined(RT_ARCH_ARM64)
7774 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
7775 or
7776 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
7777 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7778 if (iGRegEx < 16)
7779 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
7780 else
7781 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
7782
7783#else
7784# error "Port me!"
7785#endif
7786
7787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7788
7789 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
7790 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7791 return off;
7792}
7793
7794
7795
7796#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
7797 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
7798
7799/** Emits code for IEM_MC_STORE_GREG_U16. */
7800DECL_INLINE_THROW(uint32_t)
7801iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
7802{
7803 Assert(iGReg < 16);
7804 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7805 kIemNativeGstRegUse_ForUpdate);
7806#ifdef RT_ARCH_AMD64
7807 /* mov reg16, imm16 */
7808 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7809 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7810 if (idxGstTmpReg >= 8)
7811 pbCodeBuf[off++] = X86_OP_REX_B;
7812 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
7813 pbCodeBuf[off++] = RT_BYTE1(uValue);
7814 pbCodeBuf[off++] = RT_BYTE2(uValue);
7815
7816#elif defined(RT_ARCH_ARM64)
7817 /* movk xdst, #uValue, lsl #0 */
7818 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7819 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
7820
7821#else
7822# error "Port me!"
7823#endif
7824
7825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7826
7827 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7828 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7829 return off;
7830}
7831
7832
7833#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
7834 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
7835
7836/** Emits code for IEM_MC_STORE_GREG_U16. */
7837DECL_INLINE_THROW(uint32_t)
7838iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
7839{
7840 Assert(iGReg < 16);
7841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
7842
7843 /*
7844 * If it's a constant value (unlikely) we treat this as a
7845 * IEM_MC_STORE_GREG_U16_CONST statement.
7846 */
7847 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
7848 { /* likely */ }
7849 else
7850 {
7851 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
7852 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7853 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
7854 }
7855
7856 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7857 kIemNativeGstRegUse_ForUpdate);
7858
7859#ifdef RT_ARCH_AMD64
7860 /* mov reg16, reg16 or [mem16] */
7861 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
7862 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7863 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7864 {
7865 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
7866 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
7867 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
7868 pbCodeBuf[off++] = 0x8b;
7869 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
7870 }
7871 else
7872 {
7873 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
7874 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7875 if (idxGstTmpReg >= 8)
7876 pbCodeBuf[off++] = X86_OP_REX_R;
7877 pbCodeBuf[off++] = 0x8b;
7878 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7879 }
7880
7881#elif defined(RT_ARCH_ARM64)
7882 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
7883 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxValueVar, &off, true /*fInitialized*/);
7884 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7885 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
7886
7887#else
7888# error "Port me!"
7889#endif
7890
7891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7892
7893 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7894 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7895 return off;
7896}
7897
7898
7899#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
7900 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
7901
7902/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
7903DECL_INLINE_THROW(uint32_t)
7904iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
7905{
7906 Assert(iGReg < 16);
7907 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7908 kIemNativeGstRegUse_ForFullWrite);
7909 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
7910 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7911 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7912 return off;
7913}
7914
7915
7916#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
7917 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
7918
7919/** Emits code for IEM_MC_STORE_GREG_U32. */
7920DECL_INLINE_THROW(uint32_t)
7921iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
7922{
7923 Assert(iGReg < 16);
7924 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
7925
7926 /*
7927 * If it's a constant value (unlikely) we treat this as a
7928 * IEM_MC_STORE_GREG_U32_CONST statement.
7929 */
7930 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
7931 { /* likely */ }
7932 else
7933 {
7934 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
7935 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7936 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
7937 }
7938
7939 /*
7940 * For the rest we allocate a guest register for the variable and writes
7941 * it to the CPUMCTX structure.
7942 */
7943 uint8_t const idxVarReg = iemNativeVarAllocRegisterForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
7944 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7945#ifdef VBOX_STRICT
7946 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
7947#endif
7948 return off;
7949}
7950
7951
7952#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
7953 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
7954
7955/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
7956DECL_INLINE_THROW(uint32_t)
7957iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
7958{
7959 Assert(iGReg < 16);
7960 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7961 kIemNativeGstRegUse_ForFullWrite);
7962 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
7963 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7964 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7965 return off;
7966}
7967
7968
7969#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
7970 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
7971
7972/** Emits code for IEM_MC_STORE_GREG_U64. */
7973DECL_INLINE_THROW(uint32_t)
7974iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
7975{
7976 Assert(iGReg < 16);
7977 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
7978
7979 /*
7980 * If it's a constant value (unlikely) we treat this as a
7981 * IEM_MC_STORE_GREG_U64_CONST statement.
7982 */
7983 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
7984 { /* likely */ }
7985 else
7986 {
7987 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
7988 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7989 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
7990 }
7991
7992 /*
7993 * For the rest we allocate a guest register for the variable and writes
7994 * it to the CPUMCTX structure.
7995 */
7996 uint8_t const idxVarReg = iemNativeVarAllocRegisterForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
7997 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7998 return off;
7999}
8000
8001
8002#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
8003 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
8004
8005/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
8006DECL_INLINE_THROW(uint32_t)
8007iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
8008{
8009 Assert(iGReg < 16);
8010 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8011 kIemNativeGstRegUse_ForUpdate);
8012 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
8013 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8014 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8015 return off;
8016}
8017
8018
8019/*********************************************************************************************************************************
8020* General purpose register manipulation (add, sub). *
8021*********************************************************************************************************************************/
8022
8023#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8024 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8025
8026/** Emits code for IEM_MC_SUB_GREG_U16. */
8027DECL_INLINE_THROW(uint32_t)
8028iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
8029{
8030 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8031 kIemNativeGstRegUse_ForUpdate);
8032
8033#ifdef RT_ARCH_AMD64
8034 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
8035 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8036 if (idxGstTmpReg >= 8)
8037 pbCodeBuf[off++] = X86_OP_REX_B;
8038 if (uSubtrahend)
8039 {
8040 pbCodeBuf[off++] = 0xff; /* dec */
8041 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8042 }
8043 else
8044 {
8045 pbCodeBuf[off++] = 0x81;
8046 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8047 pbCodeBuf[off++] = uSubtrahend;
8048 pbCodeBuf[off++] = 0;
8049 }
8050
8051#else
8052 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8053 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8054
8055 /* sub tmp, gstgrp, uSubtrahend */
8056 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
8057
8058 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8059 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8060
8061 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8062#endif
8063
8064 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8065
8066 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8067
8068 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8069 return off;
8070}
8071
8072
8073#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
8074 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8075
8076#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
8077 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8078
8079/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
8080DECL_INLINE_THROW(uint32_t)
8081iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
8082{
8083 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8084 kIemNativeGstRegUse_ForUpdate);
8085
8086#ifdef RT_ARCH_AMD64
8087 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8088 if (f64Bit)
8089 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8090 else if (idxGstTmpReg >= 8)
8091 pbCodeBuf[off++] = X86_OP_REX_B;
8092 if (uSubtrahend == 1)
8093 {
8094 /* dec */
8095 pbCodeBuf[off++] = 0xff;
8096 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8097 }
8098 else if (uSubtrahend < 128)
8099 {
8100 pbCodeBuf[off++] = 0x83; /* sub */
8101 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8102 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8103 }
8104 else
8105 {
8106 pbCodeBuf[off++] = 0x81; /* sub */
8107 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8108 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8109 pbCodeBuf[off++] = 0;
8110 pbCodeBuf[off++] = 0;
8111 pbCodeBuf[off++] = 0;
8112 }
8113
8114#else
8115 /* sub tmp, gstgrp, uSubtrahend */
8116 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8117 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
8118
8119#endif
8120
8121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8122
8123 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8124
8125 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8126 return off;
8127}
8128
8129
8130
8131/*********************************************************************************************************************************
8132* EFLAGS *
8133*********************************************************************************************************************************/
8134
8135#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
8136 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
8137
8138/** Handles IEM_MC_FETCH_EFLAGS. */
8139DECL_INLINE_THROW(uint32_t)
8140iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8141{
8142 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8143 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8144
8145 uint8_t const idxReg = iemNativeVarAllocRegister(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
8146 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8147 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8148}
8149
8150
8151#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
8152 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
8153
8154/** Handles IEM_MC_COMMIT_EFLAGS. */
8155DECL_INLINE_THROW(uint32_t)
8156iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8157{
8158 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8159 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8160
8161 uint8_t const idxReg = iemNativeVarAllocRegister(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
8162
8163#ifdef VBOX_STRICT
8164 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
8165 off = iemNativeEmitJnzToFixed(pReNative, off, 1);
8166 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
8167
8168 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
8169 off = iemNativeEmitJzToFixed(pReNative, off, 1);
8170 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
8171#endif
8172
8173 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8174 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
8175}
8176
8177
8178
8179/*********************************************************************************************************************************
8180* Register references. *
8181*********************************************************************************************************************************/
8182
8183#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
8184 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
8185
8186#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
8187 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
8188
8189/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
8190DECL_INLINE_THROW(uint32_t)
8191iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
8192{
8193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8194 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8195 Assert(iGRegEx < 20);
8196
8197 if (iGRegEx < 16)
8198 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8199 else
8200 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
8201
8202 /* If we've delayed writing back the register value, flush it now. */
8203 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8204
8205 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8206 if (!fConst)
8207 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
8208
8209 return off;
8210}
8211
8212#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
8213 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
8214
8215#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
8216 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
8217
8218#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
8219 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
8220
8221#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
8222 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
8223
8224#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
8225 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
8226
8227#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
8228 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
8229
8230#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
8231 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
8232
8233#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
8234 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
8235
8236#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
8237 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
8238
8239#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
8240 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
8241
8242/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
8243DECL_INLINE_THROW(uint32_t)
8244iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
8245{
8246 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8247 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8248 Assert(iGReg < 16);
8249
8250 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
8251
8252 /* If we've delayed writing back the register value, flush it now. */
8253 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
8254
8255 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8256 if (!fConst)
8257 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
8258
8259 return off;
8260}
8261
8262
8263#define IEM_MC_REF_EFLAGS(a_pEFlags) \
8264 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
8265
8266/** Handles IEM_MC_REF_EFLAGS. */
8267DECL_INLINE_THROW(uint32_t)
8268iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
8269{
8270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8271 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8272
8273 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
8274
8275 /* If we've delayed writing back the register value, flush it now. */
8276 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
8277
8278 /* If there is a shadow copy of guest EFLAGS, flush it now. */
8279 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
8280
8281 return off;
8282}
8283
8284
8285/*********************************************************************************************************************************
8286* Effective Address Calculation *
8287*********************************************************************************************************************************/
8288#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
8289 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
8290
8291/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
8292 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
8293DECL_INLINE_THROW(uint32_t)
8294iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8295 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
8296{
8297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8298
8299 /*
8300 * Handle the disp16 form with no registers first.
8301 *
8302 * Convert to an immediate value, as that'll delay the register allocation
8303 * and assignment till the memory access / call / whatever and we can use
8304 * a more appropriate register (or none at all).
8305 */
8306 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
8307 {
8308 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
8309 return off;
8310 }
8311
8312 /* Determin the displacment. */
8313 uint16_t u16EffAddr;
8314 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8315 {
8316 case 0: u16EffAddr = 0; break;
8317 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
8318 case 2: u16EffAddr = u16Disp; break;
8319 default: AssertFailedStmt(u16EffAddr = 0);
8320 }
8321
8322 /* Determine the registers involved. */
8323 uint8_t idxGstRegBase;
8324 uint8_t idxGstRegIndex;
8325 switch (bRm & X86_MODRM_RM_MASK)
8326 {
8327 case 0:
8328 idxGstRegBase = X86_GREG_xBX;
8329 idxGstRegIndex = X86_GREG_xSI;
8330 break;
8331 case 1:
8332 idxGstRegBase = X86_GREG_xBX;
8333 idxGstRegIndex = X86_GREG_xDI;
8334 break;
8335 case 2:
8336 idxGstRegBase = X86_GREG_xBP;
8337 idxGstRegIndex = X86_GREG_xSI;
8338 break;
8339 case 3:
8340 idxGstRegBase = X86_GREG_xBP;
8341 idxGstRegIndex = X86_GREG_xDI;
8342 break;
8343 case 4:
8344 idxGstRegBase = X86_GREG_xSI;
8345 idxGstRegIndex = UINT8_MAX;
8346 break;
8347 case 5:
8348 idxGstRegBase = X86_GREG_xDI;
8349 idxGstRegIndex = UINT8_MAX;
8350 break;
8351 case 6:
8352 idxGstRegBase = X86_GREG_xBP;
8353 idxGstRegIndex = UINT8_MAX;
8354 break;
8355#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
8356 default:
8357#endif
8358 case 7:
8359 idxGstRegBase = X86_GREG_xBX;
8360 idxGstRegIndex = UINT8_MAX;
8361 break;
8362 }
8363
8364 /*
8365 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
8366 */
8367 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
8368 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
8369 kIemNativeGstRegUse_ReadOnly);
8370 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
8371 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
8372 kIemNativeGstRegUse_ReadOnly)
8373 : UINT8_MAX;
8374#ifdef RT_ARCH_AMD64
8375 if (idxRegIndex == UINT8_MAX)
8376 {
8377 if (u16EffAddr == 0)
8378 {
8379 /* movxz ret, base */
8380 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
8381 }
8382 else
8383 {
8384 /* lea ret32, [base64 + disp32] */
8385 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
8386 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8387 if (idxRegRet >= 8 || idxRegBase >= 8)
8388 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
8389 pbCodeBuf[off++] = 0x8d;
8390 if (idxRegBase != X86_GREG_x12 /*SIB*/)
8391 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
8392 else
8393 {
8394 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
8395 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
8396 }
8397 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
8398 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
8399 pbCodeBuf[off++] = 0;
8400 pbCodeBuf[off++] = 0;
8401 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8402
8403 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
8404 }
8405 }
8406 else
8407 {
8408 /* lea ret32, [index64 + base64 (+ disp32)] */
8409 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
8410 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8411 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
8412 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
8413 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
8414 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
8415 pbCodeBuf[off++] = 0x8d;
8416 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
8417 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
8418 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
8419 if (bMod == X86_MOD_MEM4)
8420 {
8421 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
8422 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
8423 pbCodeBuf[off++] = 0;
8424 pbCodeBuf[off++] = 0;
8425 }
8426 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8427 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
8428 }
8429
8430#elif defined(RT_ARCH_ARM64)
8431 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8432 if (u16EffAddr == 0)
8433 {
8434 if (idxRegIndex == UINT8_MAX)
8435 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
8436 else
8437 {
8438 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
8439 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
8440 }
8441 }
8442 else
8443 {
8444 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
8445 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
8446 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
8447 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
8448 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
8449 else
8450 {
8451 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
8452 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
8453 }
8454 if (idxRegIndex != UINT8_MAX)
8455 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
8456 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
8457 }
8458
8459#else
8460# error "port me"
8461#endif
8462
8463 if (idxRegIndex != UINT8_MAX)
8464 iemNativeRegFreeTmp(pReNative, idxRegIndex);
8465 iemNativeRegFreeTmp(pReNative, idxRegBase);
8466 return off;
8467}
8468
8469
8470#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
8471 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
8472
8473/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
8474 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
8475DECL_INLINE_THROW(uint32_t)
8476iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8477 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
8478{
8479 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8480
8481 /*
8482 * Handle the disp32 form with no registers first.
8483 *
8484 * Convert to an immediate value, as that'll delay the register allocation
8485 * and assignment till the memory access / call / whatever and we can use
8486 * a more appropriate register (or none at all).
8487 */
8488 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
8489 {
8490 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
8491 return off;
8492 }
8493
8494 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
8495 uint32_t u32EffAddr = 0;
8496 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8497 {
8498 case 0: break;
8499 case 1: u32EffAddr = (int8_t)u32Disp; break;
8500 case 2: u32EffAddr = u32Disp; break;
8501 default: AssertFailed();
8502 }
8503
8504 /* Get the register (or SIB) value. */
8505 uint8_t idxGstRegBase = UINT8_MAX;
8506 uint8_t idxGstRegIndex = UINT8_MAX;
8507 uint8_t cShiftIndex = 0;
8508 switch (bRm & X86_MODRM_RM_MASK)
8509 {
8510 case 0: idxGstRegBase = X86_GREG_xAX; break;
8511 case 1: idxGstRegBase = X86_GREG_xCX; break;
8512 case 2: idxGstRegBase = X86_GREG_xDX; break;
8513 case 3: idxGstRegBase = X86_GREG_xBX; break;
8514 case 4: /* SIB */
8515 {
8516 /* index /w scaling . */
8517 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
8518 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
8519 {
8520 case 0: idxGstRegIndex = X86_GREG_xAX; break;
8521 case 1: idxGstRegIndex = X86_GREG_xCX; break;
8522 case 2: idxGstRegIndex = X86_GREG_xDX; break;
8523 case 3: idxGstRegIndex = X86_GREG_xBX; break;
8524 case 4: cShiftIndex = 0; /*no index*/ break;
8525 case 5: idxGstRegIndex = X86_GREG_xBP; break;
8526 case 6: idxGstRegIndex = X86_GREG_xSI; break;
8527 case 7: idxGstRegIndex = X86_GREG_xDI; break;
8528 }
8529
8530 /* base */
8531 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
8532 {
8533 case 0: idxGstRegBase = X86_GREG_xAX; break;
8534 case 1: idxGstRegBase = X86_GREG_xCX; break;
8535 case 2: idxGstRegBase = X86_GREG_xDX; break;
8536 case 3: idxGstRegBase = X86_GREG_xBX; break;
8537 case 4:
8538 idxGstRegBase = X86_GREG_xSP;
8539 u32EffAddr += uSibAndRspOffset >> 8;
8540 break;
8541 case 5:
8542 if ((bRm & X86_MODRM_MOD_MASK) != 0)
8543 idxGstRegBase = X86_GREG_xBP;
8544 else
8545 {
8546 Assert(u32EffAddr == 0);
8547 u32EffAddr = u32Disp;
8548 }
8549 break;
8550 case 6: idxGstRegBase = X86_GREG_xSI; break;
8551 case 7: idxGstRegBase = X86_GREG_xDI; break;
8552 }
8553 break;
8554 }
8555 case 5: idxGstRegBase = X86_GREG_xBP; break;
8556 case 6: idxGstRegBase = X86_GREG_xSI; break;
8557 case 7: idxGstRegBase = X86_GREG_xDI; break;
8558 }
8559
8560 /*
8561 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
8562 * the start of the function.
8563 */
8564 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
8565 {
8566 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
8567 return off;
8568 }
8569
8570 /*
8571 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
8572 */
8573 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
8574 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
8575 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
8576 kIemNativeGstRegUse_ReadOnly);
8577 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
8578 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
8579 kIemNativeGstRegUse_ReadOnly);
8580
8581 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
8582 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
8583 {
8584 idxRegBase = idxRegIndex;
8585 idxRegIndex = UINT8_MAX;
8586 }
8587
8588#ifdef RT_ARCH_AMD64
8589 if (idxRegIndex == UINT8_MAX)
8590 {
8591 if (u32EffAddr == 0)
8592 {
8593 /* mov ret, base */
8594 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
8595 }
8596 else
8597 {
8598 /* lea ret32, [base64 + disp32] */
8599 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
8600 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8601 if (idxRegRet >= 8 || idxRegBase >= 8)
8602 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
8603 pbCodeBuf[off++] = 0x8d;
8604 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
8605 if (idxRegBase != X86_GREG_x12 /*SIB*/)
8606 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
8607 else
8608 {
8609 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
8610 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
8611 }
8612 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
8613 if (bMod == X86_MOD_MEM4)
8614 {
8615 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
8616 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
8617 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
8618 }
8619 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8620 }
8621 }
8622 else
8623 {
8624 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
8625 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8626 if (idxRegBase == UINT8_MAX)
8627 {
8628 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
8629 if (idxRegRet >= 8 || idxRegIndex >= 8)
8630 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
8631 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
8632 pbCodeBuf[off++] = 0x8d;
8633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
8634 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
8635 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
8636 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
8637 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
8638 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
8639 }
8640 else
8641 {
8642 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
8643 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
8644 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
8645 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
8646 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
8647 pbCodeBuf[off++] = 0x8d;
8648 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
8649 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
8650 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
8651 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
8652 if (bMod != X86_MOD_MEM0)
8653 {
8654 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
8655 if (bMod == X86_MOD_MEM4)
8656 {
8657 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
8658 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
8659 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
8660 }
8661 }
8662 }
8663 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8664 }
8665
8666#elif defined(RT_ARCH_ARM64)
8667 if (u32EffAddr == 0)
8668 {
8669 if (idxRegIndex == UINT8_MAX)
8670 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
8671 else if (idxRegBase == UINT8_MAX)
8672 {
8673 if (cShiftIndex == 0)
8674 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
8675 else
8676 {
8677 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8678 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
8679 }
8680 }
8681 else
8682 {
8683 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8684 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
8685 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
8686 }
8687 }
8688 else
8689 {
8690 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
8691 {
8692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8693 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
8694 }
8695 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
8696 {
8697 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8698 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
8699 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
8700 }
8701 else
8702 {
8703 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
8704 if (idxRegBase != UINT8_MAX)
8705 {
8706 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8707 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
8708 }
8709 }
8710 if (idxRegIndex != UINT8_MAX)
8711 {
8712 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8713 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
8714 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
8715 }
8716 }
8717
8718#else
8719# error "port me"
8720#endif
8721
8722 if (idxRegIndex != UINT8_MAX)
8723 iemNativeRegFreeTmp(pReNative, idxRegIndex);
8724 if (idxRegBase != UINT8_MAX)
8725 iemNativeRegFreeTmp(pReNative, idxRegBase);
8726 return off;
8727}
8728
8729
8730#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
8731 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff)
8732
8733#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
8734 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 64)
8735
8736#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
8737 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 32)
8738
8739
8740
8741/*********************************************************************************************************************************
8742* Memory fetches and stores common *
8743*********************************************************************************************************************************/
8744
8745typedef enum IEMNATIVEMITMEMOP
8746{
8747 kIemNativeEmitMemOp_Store = 0,
8748 kIemNativeEmitMemOp_Fetch,
8749 kIemNativeEmitMemOp_Fetch_Zx_U16,
8750 kIemNativeEmitMemOp_Fetch_Zx_U32,
8751 kIemNativeEmitMemOp_Fetch_Zx_U64,
8752 kIemNativeEmitMemOp_Fetch_Sx_U16,
8753 kIemNativeEmitMemOp_Fetch_Sx_U32,
8754 kIemNativeEmitMemOp_Fetch_Sx_U64
8755} IEMNATIVEMITMEMOP;
8756
8757/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
8758 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
8759 * (with iSegReg = UINT8_MAX). */
8760DECL_INLINE_THROW(uint32_t)
8761iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
8762 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
8763 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
8764{
8765 /*
8766 * Assert sanity.
8767 */
8768 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8769 Assert( enmOp != kIemNativeEmitMemOp_Store
8770 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
8771 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
8772 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8773 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
8774 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
8775 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8776 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8777 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
8778 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8779#ifdef VBOX_STRICT
8780 if (iSegReg == UINT8_MAX)
8781 {
8782 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8783 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8784 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8785 switch (cbMem)
8786 {
8787 case 1:
8788 Assert( pfnFunction
8789 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
8790 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8791 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8792 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8793 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8794 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
8795 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
8796 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
8797 : UINT64_C(0xc000b000a0009000) ));
8798 break;
8799 case 2:
8800 Assert( pfnFunction
8801 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
8802 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
8803 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
8804 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
8805 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
8806 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
8807 : UINT64_C(0xc000b000a0009000) ));
8808 break;
8809 case 4:
8810 Assert( pfnFunction
8811 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
8812 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
8813 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
8814 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
8815 : UINT64_C(0xc000b000a0009000) ));
8816 break;
8817 case 8:
8818 Assert( pfnFunction
8819 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
8820 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
8821 : UINT64_C(0xc000b000a0009000) ));
8822 break;
8823 }
8824 }
8825 else
8826 {
8827 Assert(iSegReg < 6);
8828 switch (cbMem)
8829 {
8830 case 1:
8831 Assert( pfnFunction
8832 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
8833 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
8834 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
8835 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
8836 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
8837 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
8838 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
8839 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
8840 : UINT64_C(0xc000b000a0009000) ));
8841 break;
8842 case 2:
8843 Assert( pfnFunction
8844 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
8845 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
8846 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
8847 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
8848 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
8849 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
8850 : UINT64_C(0xc000b000a0009000) ));
8851 break;
8852 case 4:
8853 Assert( pfnFunction
8854 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
8855 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
8856 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
8857 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
8858 : UINT64_C(0xc000b000a0009000) ));
8859 break;
8860 case 8:
8861 Assert( pfnFunction
8862 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
8863 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
8864 : UINT64_C(0xc000b000a0009000) ));
8865 break;
8866 }
8867 }
8868#endif
8869
8870#ifdef VBOX_STRICT
8871 /*
8872 * Check that the fExec flags we've got make sense.
8873 */
8874 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8875#endif
8876
8877 /*
8878 * To keep things simple we have to commit any pending writes first as we
8879 * may end up making calls.
8880 */
8881 /** @todo we could postpone this till we make the call and reload the
8882 * registers after returning from the call. Not sure if that's sensible or
8883 * not, though. */
8884 off = iemNativeRegFlushPendingWrites(pReNative, off);
8885
8886 /*
8887 * Move/spill/flush stuff out of call-volatile registers.
8888 * This is the easy way out. We could contain this to the tlb-miss branch
8889 * by saving and restoring active stuff here.
8890 */
8891 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8892 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8893
8894 /*
8895 * Define labels and allocate the result register (trying for the return
8896 * register if we can).
8897 */
8898 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8899 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8900 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8901 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
8902 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8903 ? iemNativeVarSetRegister(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, off)
8904 : iemNativeVarAllocRegister(pReNative, idxVarValue, &off);
8905
8906 /*
8907 * First we try to go via the TLB.
8908 */
8909//pReNative->pInstrBuf[off++] = 0xcc;
8910 /** @todo later. */
8911 RT_NOREF(fAlignMask, cbMem);
8912
8913 /*
8914 * Call helper to do the fetching.
8915 * We flush all guest register shadow copies here.
8916 */
8917 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8918
8919#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8920 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8921#else
8922 RT_NOREF(idxInstr);
8923#endif
8924
8925 uint8_t idxRegArgValue;
8926 if (iSegReg == UINT8_MAX)
8927 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
8928 else
8929 {
8930 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
8931 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8932 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
8933
8934 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
8935 }
8936
8937 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
8938 if (enmOp == kIemNativeEmitMemOp_Store)
8939 {
8940 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
8941 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
8942 else
8943 {
8944 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
8945 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
8946 {
8947 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
8949 }
8950 else
8951 {
8952 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
8953 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8954 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
8955 }
8956 }
8957 }
8958
8959 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
8960 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
8961 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
8962 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
8963 else
8964 {
8965 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
8966 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
8967 {
8968 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8969 if (!offDisp)
8970 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
8971 else
8972 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
8973 }
8974 else
8975 {
8976 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
8977 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8978 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
8979 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
8980 if (offDisp)
8981 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
8982 }
8983 }
8984
8985 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8986 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8987
8988 /* Done setting up parameters, make the call. */
8989 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8990
8991 /*
8992 * Put the result in the right register if this is a fetch.
8993 */
8994 if (enmOp != kIemNativeEmitMemOp_Store)
8995 {
8996 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
8997 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
8998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
8999 }
9000
9001 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9002
9003 return off;
9004}
9005
9006
9007
9008/*********************************************************************************************************************************
9009* Memory fetches (IEM_MEM_FETCH_XXX). *
9010*********************************************************************************************************************************/
9011
9012/* 8-bit segmented: */
9013#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
9014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
9015 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
9016 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9017
9018#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
9019 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9020 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
9021 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9022
9023#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9025 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
9026 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9027
9028#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9029 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9030 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9031 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9032
9033#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
9034 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9035 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
9036 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
9037
9038#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9039 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9040 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
9041 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
9042
9043#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9045 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9046 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
9047
9048/* 16-bit segmented: */
9049#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
9050 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9051 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
9052 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
9053
9054#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
9055 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9056 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
9057 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
9058
9059#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9061 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
9062 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
9063
9064#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9066 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9067 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
9068
9069#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9071 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
9072 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
9073
9074#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9076 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9077 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
9078
9079
9080/* 32-bit segmented: */
9081#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
9082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9083 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9084 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
9085
9086#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
9087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
9088 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9089 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
9090
9091#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9092 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9093 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9094 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
9095
9096#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9097 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9098 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9099 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
9100
9101
9102/* 64-bit segmented: */
9103#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
9104 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
9105 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
9106 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
9107
9108
9109
9110/* 8-bit flat: */
9111#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
9112 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
9113 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
9114 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
9115
9116#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
9117 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
9118 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
9119 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
9120
9121#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
9122 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9123 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
9124 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
9125
9126#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
9127 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9128 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9129 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
9130
9131#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
9132 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
9133 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
9134 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
9135
9136#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
9137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9138 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
9139 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
9140
9141#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
9142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9143 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9144 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
9145
9146
9147/* 16-bit flat: */
9148#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
9149 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
9150 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
9151 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
9152
9153#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
9154 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
9155 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
9156 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
9157
9158#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
9159 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9160 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
9161 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
9162
9163#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
9164 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9165 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9166 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
9167
9168#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
9169 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9170 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
9171 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
9172
9173#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
9174 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9175 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9176 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
9177
9178/* 32-bit flat: */
9179#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
9180 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9181 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9182 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
9183
9184#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
9185 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9186 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9187 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
9188
9189#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
9190 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9191 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9192 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
9193
9194#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
9195 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9196 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9197 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
9198
9199/* 64-bit flat: */
9200#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
9201 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9202 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
9203 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
9204
9205
9206
9207/*********************************************************************************************************************************
9208* Memory stores (IEM_MEM_STORE_XXX). *
9209*********************************************************************************************************************************/
9210
9211#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
9212 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
9213 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
9214 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
9215
9216#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
9217 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
9218 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
9219 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
9220
9221#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
9222 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
9223 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
9224 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
9225
9226#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
9227 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
9228 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
9229 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
9230
9231
9232#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
9233 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
9234 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
9235 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
9236
9237#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
9238 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
9239 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
9240 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
9241
9242#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
9243 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
9244 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
9245 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
9246
9247#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
9248 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
9249 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
9250 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
9251
9252
9253#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
9254 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9255 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
9256
9257#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
9258 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9259 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
9260
9261#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
9262 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9263 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
9264
9265#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
9266 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9267 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
9268
9269
9270#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
9271 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9272 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
9273
9274#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
9275 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9276 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
9277
9278#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
9279 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9280 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
9281
9282#define IEM_MC_STORE_MEM_FLAT_U64_CONST(_GCPtrMem, a_u64ConstValue) \
9283 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9284 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
9285
9286/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
9287 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
9288DECL_INLINE_THROW(uint32_t)
9289iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
9290 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
9291{
9292 /*
9293 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
9294 * to do the grunt work.
9295 */
9296 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
9297 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
9298 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
9299 pfnFunction, idxInstr);
9300 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
9301 return off;
9302}
9303
9304
9305
9306/*********************************************************************************************************************************
9307* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
9308*********************************************************************************************************************************/
9309
9310#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9311 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9312 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9313 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
9314
9315#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9316 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9317 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9318 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
9319
9320#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9321 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9322 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
9323 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
9324
9325
9326#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9327 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9328 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9329 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
9330
9331#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9333 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9334 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9335
9336#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9337 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9338 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9339 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
9340
9341#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9342 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
9343 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9344 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9345
9346
9347#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9348 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9349 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9350 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
9351
9352#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9353 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9354 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9355 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9356
9357#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9358 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9359 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9360 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
9361
9362#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9363 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
9364 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9365 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9366
9367
9368#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9369 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9370 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9371 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
9372
9373#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9374 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9375 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9376 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9377
9378#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9379 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9380 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9381 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9382
9383#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9384 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
9385 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9386 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9387
9388
9389#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9390 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9391 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9392 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9393
9394#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9395 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9396 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
9397 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9398
9399
9400#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9401 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9402 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9403 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9404
9405#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9406 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9407 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9408 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9409
9410#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9411 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9412 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9413 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9414
9415
9416
9417#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9418 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9419 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9420 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9421
9422#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9423 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9424 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9425 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9426
9427#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9428 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9429 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
9430 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9431
9432
9433#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9434 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9435 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9436 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9437
9438#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9439 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9440 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9441 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9442
9443#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9444 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9445 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9446 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9447
9448#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9450 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9451 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9452
9453
9454#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9455 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9456 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9457 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9458
9459#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9460 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9461 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9462 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9463
9464#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9465 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9466 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9467 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9468
9469#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9470 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9471 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9472 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9473
9474
9475#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9476 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9477 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9478 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9479
9480#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9481 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9482 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9483 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9484
9485#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9487 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9488 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9489
9490#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9492 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9493 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9494
9495
9496#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9497 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9498 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9499 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9500
9501#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9502 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9503 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
9504 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9505
9506
9507#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9508 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9509 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9510 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9511
9512#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9513 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9514 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9515 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9516
9517#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9518 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9519 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9520 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9521
9522
9523DECL_INLINE_THROW(uint32_t)
9524iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9525 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
9526 uintptr_t pfnFunction, uint8_t idxInstr)
9527{
9528 /*
9529 * Assert sanity.
9530 */
9531 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9532 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
9533 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
9534 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9535
9536 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9537 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
9538 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
9539 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9540
9541 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9542 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
9543 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
9544 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9545
9546 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9547
9548 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9549
9550#ifdef VBOX_STRICT
9551# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9552 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9553 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9554 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
9555 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9556
9557 if (iSegReg == UINT8_MAX)
9558 {
9559 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9560 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9561 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9562 switch (cbMem)
9563 {
9564 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
9565 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
9566 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
9567 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
9568 case 10:
9569 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9570 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9571 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9572 break;
9573 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
9574# if 0
9575 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
9576 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
9577# endif
9578 default: AssertFailed(); break;
9579 }
9580 }
9581 else
9582 {
9583 Assert(iSegReg < 6);
9584 switch (cbMem)
9585 {
9586 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
9587 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
9588 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
9589 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
9590 case 10:
9591 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9592 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9593 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9594 break;
9595 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
9596# if 0
9597 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
9598 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
9599# endif
9600 default: AssertFailed(); break;
9601 }
9602 }
9603# undef IEM_MAP_HLP_FN
9604#endif
9605
9606#ifdef VBOX_STRICT
9607 /*
9608 * Check that the fExec flags we've got make sense.
9609 */
9610 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9611#endif
9612
9613 /*
9614 * To keep things simple we have to commit any pending writes first as we
9615 * may end up making calls.
9616 */
9617 /** @todo we could postpone this till we make the call and reload the
9618 * registers after returning from the call. Not sure if that's sensible or
9619 * not, though. */
9620 off = iemNativeRegFlushPendingWrites(pReNative, off);
9621
9622 /*
9623 * Move/spill/flush stuff out of call-volatile registers.
9624 * This is the easy way out. We could contain this to the tlb-miss branch
9625 * by saving and restoring active stuff here.
9626 */
9627 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9628 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9629
9630 /*
9631 * Define labels and allocate the result register (trying for the return
9632 * register if we can - which we of course can, given the above call).
9633 */
9634 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9635 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
9636 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9637 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9638 ? iemNativeVarSetRegister(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, off)
9639 : iemNativeVarAllocRegister(pReNative, idxVarMem, &off);
9640
9641 /*
9642 * First we try to go via the TLB.
9643 */
9644//pReNative->pInstrBuf[off++] = 0xcc;
9645 /** @todo later. */
9646 RT_NOREF(fAccess, fAlignMask, cbMem);
9647
9648 /*
9649 * Call helper to do the fetching.
9650 * We flush all guest register shadow copies here.
9651 */
9652 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
9653
9654#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9655 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9656#else
9657 RT_NOREF(idxInstr);
9658#endif
9659
9660 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9661 if (iSegReg != UINT8_MAX)
9662 {
9663 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9664 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9665 }
9666
9667 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem */
9668 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem);
9669
9670 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo */
9671 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9672 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, true /*fFlushShadows*/);
9673
9674 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9675 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9676
9677 /* Done setting up parameters, make the call. */
9678 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9679
9680 /*
9681 * Put the result in the right register .
9682 */
9683 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
9684 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9685 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9686
9687 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9688
9689 return off;
9690}
9691
9692
9693#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9694 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
9695 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9696
9697#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9698 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
9699 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9700
9701#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9702 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
9703 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9704
9705DECL_INLINE_THROW(uint32_t)
9706iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9707 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9708{
9709 /*
9710 * Assert sanity.
9711 */
9712 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9713 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
9714 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9715 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9716#ifdef VBOX_STRICT
9717 switch (fAccess & IEM_ACCESS_TYPE_MASK)
9718 {
9719 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9720 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9721 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9722 default: AssertFailed();
9723 }
9724#endif
9725
9726 /*
9727 * To keep things simple we have to commit any pending writes first as we
9728 * may end up making calls (there shouldn't be any at this point, so this
9729 * is just for consistency).
9730 */
9731 /** @todo we could postpone this till we make the call and reload the
9732 * registers after returning from the call. Not sure if that's sensible or
9733 * not, though. */
9734 off = iemNativeRegFlushPendingWrites(pReNative, off);
9735
9736 /*
9737 * Move/spill/flush stuff out of call-volatile registers.
9738 */
9739 /** @todo save+restore active registers and maybe guest shadows in miss
9740 * scenario. */
9741 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9742
9743 /*
9744 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9745 * to call the unmap helper function.
9746 */
9747//pReNative->pInstrBuf[off++] = 0xcc;
9748 RT_NOREF(fAccess);
9749
9750#ifdef RT_ARCH_AMD64
9751 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
9752 {
9753 /* test byte [rbp - xxx], 0ffh */
9754 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9755 pbCodeBuf[off++] = 0xf6;
9756 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
9757 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9758 pbCodeBuf[off++] = 0xff;
9759 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9760 }
9761 else
9762#endif
9763 {
9764 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxVarUnmapInfo, &off);
9765 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9766 }
9767 uint32_t const offJmpFixup = off;
9768 off = iemNativeEmitJzToFixed(pReNative, off, 0);
9769
9770 /*
9771 * Call the unmap helper function.
9772 */
9773#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9774 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9775#else
9776 RT_NOREF(idxInstr);
9777#endif
9778
9779 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo */
9780 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo);
9781
9782 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9783 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9784
9785 /* Done setting up parameters, make the call. */
9786 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9787
9788 /*
9789 * Done, just fixup the jump for the non-call case.
9790 */
9791 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9792
9793 return off;
9794}
9795
9796
9797
9798/*********************************************************************************************************************************
9799* State and Exceptions *
9800*********************************************************************************************************************************/
9801
9802#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9803#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9804
9805#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9806#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9807#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9808
9809#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9810#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9811#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9812
9813
9814DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9815{
9816 /** @todo this needs a lot more work later. */
9817 RT_NOREF(pReNative, fForChange);
9818 return off;
9819}
9820
9821
9822
9823/*********************************************************************************************************************************
9824* Builtin functions *
9825*********************************************************************************************************************************/
9826
9827/**
9828 * Built-in function that calls a C-implemention function taking zero arguments.
9829 */
9830static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
9831{
9832 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
9833 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
9834 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
9835 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
9836}
9837
9838
9839/**
9840 * Built-in function that checks for pending interrupts that can be delivered or
9841 * forced action flags.
9842 *
9843 * This triggers after the completion of an instruction, so EIP is already at
9844 * the next instruction. If an IRQ or important FF is pending, this will return
9845 * a non-zero status that stops TB execution.
9846 */
9847static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
9848{
9849 RT_NOREF(pCallEntry);
9850
9851 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
9852 and I'm too lazy to create a 'Fixed' version of that one. */
9853 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
9854 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
9855
9856 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
9857
9858 /* Again, we need to load the extended EFLAGS before we actually need them
9859 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
9860 loaded them inside the check, as the shadow state would not be correct
9861 when the code branches before the load. Ditto PC. */
9862 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
9863 kIemNativeGstRegUse_ReadOnly);
9864
9865 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
9866
9867 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9868
9869 /*
9870 * Start by checking the local forced actions of the EMT we're on for IRQs
9871 * and other FFs that needs servicing.
9872 */
9873 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
9874 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
9875 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
9876 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
9877 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
9878 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
9879 | VMCPU_FF_TLB_FLUSH
9880 | VMCPU_FF_UNHALT ),
9881 true /*fSetFlags*/);
9882 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
9883 uint32_t const offFixupJumpToVmCheck1 = off;
9884 off = iemNativeEmitJzToFixed(pReNative, off, 0);
9885
9886 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
9887 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
9888 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
9889 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
9890 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
9891 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
9892
9893 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
9894 suppressed by the CPU or not. */
9895 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
9896 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
9897 idxLabelReturnBreak);
9898
9899 /* We've got shadow flags set, so we must check that the PC they are valid
9900 for matches our current PC value. */
9901 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
9902 * a register. */
9903 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
9904 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
9905
9906 /*
9907 * Now check the force flags of the VM.
9908 */
9909 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
9910 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
9911 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
9912 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
9913 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
9914 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
9915
9916 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
9917
9918 /*
9919 * We're good, no IRQs or FFs pending.
9920 */
9921 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9922 iemNativeRegFreeTmp(pReNative, idxEflReg);
9923 iemNativeRegFreeTmp(pReNative, idxPcReg);
9924
9925 return off;
9926}
9927
9928
9929/**
9930 * Built-in function checks if IEMCPU::fExec has the expected value.
9931 */
9932static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
9933{
9934 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
9935 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9936
9937 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
9938 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
9939 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
9940 kIemNativeLabelType_ReturnBreak);
9941 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9942 return off;
9943}
9944
9945
9946
9947/*********************************************************************************************************************************
9948* The native code generator functions for each MC block. *
9949*********************************************************************************************************************************/
9950
9951
9952/*
9953 * Include g_apfnIemNativeRecompileFunctions and associated functions.
9954 *
9955 * This should probably live in it's own file later, but lets see what the
9956 * compile times turn out to be first.
9957 */
9958#include "IEMNativeFunctions.cpp.h"
9959
9960
9961
9962/*********************************************************************************************************************************
9963* Recompiler Core. *
9964*********************************************************************************************************************************/
9965
9966
9967/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
9968static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
9969{
9970 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
9971 pDis->cbCachedInstr += cbMaxRead;
9972 RT_NOREF(cbMinRead);
9973 return VERR_NO_DATA;
9974}
9975
9976
9977/**
9978 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
9979 * @returns pszBuf.
9980 * @param fFlags The flags.
9981 * @param pszBuf The output buffer.
9982 * @param cbBuf The output buffer size. At least 32 bytes.
9983 */
9984DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
9985{
9986 Assert(cbBuf >= 32);
9987 static RTSTRTUPLE const s_aModes[] =
9988 {
9989 /* [00] = */ { RT_STR_TUPLE("16BIT") },
9990 /* [01] = */ { RT_STR_TUPLE("32BIT") },
9991 /* [02] = */ { RT_STR_TUPLE("!2!") },
9992 /* [03] = */ { RT_STR_TUPLE("!3!") },
9993 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
9994 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
9995 /* [06] = */ { RT_STR_TUPLE("!6!") },
9996 /* [07] = */ { RT_STR_TUPLE("!7!") },
9997 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
9998 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
9999 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
10000 /* [0b] = */ { RT_STR_TUPLE("!b!") },
10001 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
10002 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
10003 /* [0e] = */ { RT_STR_TUPLE("!e!") },
10004 /* [0f] = */ { RT_STR_TUPLE("!f!") },
10005 /* [10] = */ { RT_STR_TUPLE("!10!") },
10006 /* [11] = */ { RT_STR_TUPLE("!11!") },
10007 /* [12] = */ { RT_STR_TUPLE("!12!") },
10008 /* [13] = */ { RT_STR_TUPLE("!13!") },
10009 /* [14] = */ { RT_STR_TUPLE("!14!") },
10010 /* [15] = */ { RT_STR_TUPLE("!15!") },
10011 /* [16] = */ { RT_STR_TUPLE("!16!") },
10012 /* [17] = */ { RT_STR_TUPLE("!17!") },
10013 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
10014 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
10015 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
10016 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
10017 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
10018 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
10019 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
10020 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
10021 };
10022 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
10023 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
10024 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
10025
10026 pszBuf[off++] = ' ';
10027 pszBuf[off++] = 'C';
10028 pszBuf[off++] = 'P';
10029 pszBuf[off++] = 'L';
10030 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
10031 Assert(off < 32);
10032
10033 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
10034
10035 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
10036 {
10037 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
10038 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
10039 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
10040 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
10041 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
10042 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
10043 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
10044 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
10045 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
10046 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
10047 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
10048 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
10049 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
10050 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
10051 };
10052 if (fFlags)
10053 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
10054 if (s_aFlags[i].fFlag & fFlags)
10055 {
10056 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
10057 pszBuf[off++] = ' ';
10058 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
10059 off += s_aFlags[i].cchName;
10060 fFlags &= ~s_aFlags[i].fFlag;
10061 if (!fFlags)
10062 break;
10063 }
10064 pszBuf[off] = '\0';
10065
10066 return pszBuf;
10067}
10068
10069
10070DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
10071{
10072 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
10073
10074 char szDisBuf[512];
10075 DISSTATE Dis;
10076 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
10077 uint32_t const cNative = pTb->Native.cInstructions;
10078 uint32_t offNative = 0;
10079#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10080 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
10081#endif
10082 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
10083 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
10084 : DISCPUMODE_64BIT;
10085#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
10086 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
10087#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
10088 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
10089#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
10090# error "Port me"
10091#else
10092 csh hDisasm = ~(size_t)0;
10093# if defined(RT_ARCH_AMD64)
10094 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
10095# elif defined(RT_ARCH_ARM64)
10096 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
10097# else
10098# error "Port me"
10099# endif
10100 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
10101#endif
10102
10103 /*
10104 * Print TB info.
10105 */
10106 pHlp->pfnPrintf(pHlp,
10107 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
10108 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
10109 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
10110 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
10111#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10112 if (pDbgInfo && pDbgInfo->cEntries > 1)
10113 {
10114 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
10115
10116 /*
10117 * This disassembly is driven by the debug info which follows the native
10118 * code and indicates when it starts with the next guest instructions,
10119 * where labels are and such things.
10120 */
10121 uint32_t idxThreadedCall = 0;
10122 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
10123 uint8_t idxRange = UINT8_MAX;
10124 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
10125 uint32_t offRange = 0;
10126 uint32_t offOpcodes = 0;
10127 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
10128 uint32_t const cDbgEntries = pDbgInfo->cEntries;
10129 uint32_t iDbgEntry = 1;
10130 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
10131
10132 while (offNative < cNative)
10133 {
10134 /* If we're at or have passed the point where the next chunk of debug
10135 info starts, process it. */
10136 if (offDbgNativeNext <= offNative)
10137 {
10138 offDbgNativeNext = UINT32_MAX;
10139 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
10140 {
10141 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
10142 {
10143 case kIemTbDbgEntryType_GuestInstruction:
10144 {
10145 /* Did the exec flag change? */
10146 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
10147 {
10148 pHlp->pfnPrintf(pHlp,
10149 " fExec change %#08x -> %#08x %s\n",
10150 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
10151 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
10152 szDisBuf, sizeof(szDisBuf)));
10153 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
10154 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
10155 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
10156 : DISCPUMODE_64BIT;
10157 }
10158
10159 /* New opcode range? We need to fend up a spurious debug info entry here for cases
10160 where the compilation was aborted before the opcode was recorded and the actual
10161 instruction was translated to a threaded call. This may happen when we run out
10162 of ranges, or when some complicated interrupts/FFs are found to be pending or
10163 similar. So, we just deal with it here rather than in the compiler code as it
10164 is a lot simpler to do up here. */
10165 if ( idxRange == UINT8_MAX
10166 || idxRange >= cRanges
10167 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
10168 {
10169 idxRange += 1;
10170 if (idxRange < cRanges)
10171 offRange = 0;
10172 else
10173 continue;
10174 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
10175 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
10176 + (pTb->aRanges[idxRange].idxPhysPage == 0
10177 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
10178 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
10179 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
10180 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
10181 pTb->aRanges[idxRange].idxPhysPage);
10182 }
10183
10184 /* Disassemble the instruction. */
10185 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
10186 uint32_t cbInstr = 1;
10187 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
10188 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
10189 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
10190 if (RT_SUCCESS(rc))
10191 {
10192 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10193 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10194 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10195 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10196
10197 static unsigned const s_offMarker = 55;
10198 static char const s_szMarker[] = " ; <--- guest";
10199 if (cch < s_offMarker)
10200 {
10201 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
10202 cch = s_offMarker;
10203 }
10204 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
10205 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
10206
10207 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
10208 }
10209 else
10210 {
10211 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
10212 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
10213 cbInstr = 1;
10214 }
10215 GCPhysPc += cbInstr;
10216 offOpcodes += cbInstr;
10217 offRange += cbInstr;
10218 continue;
10219 }
10220
10221 case kIemTbDbgEntryType_ThreadedCall:
10222 pHlp->pfnPrintf(pHlp,
10223 " Call #%u to %s (%u args)%s\n",
10224 idxThreadedCall,
10225 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
10226 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
10227 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
10228 idxThreadedCall++;
10229 continue;
10230
10231 case kIemTbDbgEntryType_GuestRegShadowing:
10232 {
10233 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
10234 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
10235 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
10236 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
10237 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
10238 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
10239 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
10240 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
10241 else
10242 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
10243 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
10244 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
10245 continue;
10246 }
10247
10248 case kIemTbDbgEntryType_Label:
10249 {
10250 const char *pszName = "what_the_fudge";
10251 const char *pszComment = "";
10252 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
10253 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
10254 {
10255 case kIemNativeLabelType_Return:
10256 pszName = "Return";
10257 break;
10258 case kIemNativeLabelType_ReturnBreak:
10259 pszName = "ReturnBreak";
10260 break;
10261 case kIemNativeLabelType_ReturnWithFlags:
10262 pszName = "ReturnWithFlags";
10263 break;
10264 case kIemNativeLabelType_NonZeroRetOrPassUp:
10265 pszName = "NonZeroRetOrPassUp";
10266 break;
10267 case kIemNativeLabelType_RaiseGp0:
10268 pszName = "RaiseGp0";
10269 break;
10270 case kIemNativeLabelType_If:
10271 pszName = "If";
10272 fNumbered = true;
10273 break;
10274 case kIemNativeLabelType_Else:
10275 pszName = "Else";
10276 fNumbered = true;
10277 pszComment = " ; regs state restored pre-if-block";
10278 break;
10279 case kIemNativeLabelType_Endif:
10280 pszName = "Endif";
10281 fNumbered = true;
10282 break;
10283 case kIemNativeLabelType_CheckIrq:
10284 pszName = "CheckIrq_CheckVM";
10285 fNumbered = true;
10286 break;
10287 case kIemNativeLabelType_TlbMiss:
10288 pszName = "CheckIrq_TlbMiss";
10289 fNumbered = true;
10290 break;
10291 case kIemNativeLabelType_TlbDone:
10292 pszName = "CheckIrq_TlbDone";
10293 fNumbered = true;
10294 break;
10295 case kIemNativeLabelType_Invalid:
10296 case kIemNativeLabelType_End:
10297 break;
10298 }
10299 if (fNumbered)
10300 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
10301 else
10302 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
10303 continue;
10304 }
10305
10306 case kIemTbDbgEntryType_NativeOffset:
10307 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
10308 Assert(offDbgNativeNext > offNative);
10309 break;
10310
10311 default:
10312 AssertFailed();
10313 }
10314 iDbgEntry++;
10315 break;
10316 }
10317 }
10318
10319 /*
10320 * Disassemble the next native instruction.
10321 */
10322 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10323# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10324 uint32_t cbInstr = sizeof(paNative[0]);
10325 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10326 if (RT_SUCCESS(rc))
10327 {
10328# if defined(RT_ARCH_AMD64)
10329 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10330 {
10331 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10332 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10333 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
10334 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10335 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10336 uInfo & 0x8000 ? " - recompiled" : "");
10337 else
10338 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10339 }
10340 else
10341# endif
10342 {
10343# ifdef RT_ARCH_AMD64
10344 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10345 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10346 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10347 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10348# elif defined(RT_ARCH_ARM64)
10349 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10350 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10351 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10352# else
10353# error "Port me"
10354# endif
10355 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10356 }
10357 }
10358 else
10359 {
10360# if defined(RT_ARCH_AMD64)
10361 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10362 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10363# elif defined(RT_ARCH_ARM64)
10364 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10365# else
10366# error "Port me"
10367# endif
10368 cbInstr = sizeof(paNative[0]);
10369 }
10370 offNative += cbInstr / sizeof(paNative[0]);
10371
10372# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10373 cs_insn *pInstr;
10374 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10375 (uintptr_t)pNativeCur, 1, &pInstr);
10376 if (cInstrs > 0)
10377 {
10378 Assert(cInstrs == 1);
10379# if defined(RT_ARCH_AMD64)
10380 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10381 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10382# else
10383 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10384 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10385# endif
10386 offNative += pInstr->size / sizeof(*pNativeCur);
10387 cs_free(pInstr, cInstrs);
10388 }
10389 else
10390 {
10391# if defined(RT_ARCH_AMD64)
10392 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10393 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10394# else
10395 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10396# endif
10397 offNative++;
10398 }
10399# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10400 }
10401 }
10402 else
10403#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
10404 {
10405 /*
10406 * No debug info, just disassemble the x86 code and then the native code.
10407 *
10408 * First the guest code:
10409 */
10410 for (unsigned i = 0; i < pTb->cRanges; i++)
10411 {
10412 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
10413 + (pTb->aRanges[i].idxPhysPage == 0
10414 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
10415 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
10416 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
10417 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
10418 unsigned off = pTb->aRanges[i].offOpcodes;
10419 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
10420 while (off < cbOpcodes)
10421 {
10422 uint32_t cbInstr = 1;
10423 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
10424 &pTb->pabOpcodes[off], cbOpcodes - off,
10425 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
10426 if (RT_SUCCESS(rc))
10427 {
10428 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10429 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10430 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10431 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10432 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
10433 GCPhysPc += cbInstr;
10434 off += cbInstr;
10435 }
10436 else
10437 {
10438 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
10439 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
10440 break;
10441 }
10442 }
10443 }
10444
10445 /*
10446 * Then the native code:
10447 */
10448 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
10449 while (offNative < cNative)
10450 {
10451 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10452# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10453 uint32_t cbInstr = sizeof(paNative[0]);
10454 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10455 if (RT_SUCCESS(rc))
10456 {
10457# if defined(RT_ARCH_AMD64)
10458 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10459 {
10460 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10461 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10462 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
10463 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10464 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10465 uInfo & 0x8000 ? " - recompiled" : "");
10466 else
10467 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10468 }
10469 else
10470# endif
10471 {
10472# ifdef RT_ARCH_AMD64
10473 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10474 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10475 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10476 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10477# elif defined(RT_ARCH_ARM64)
10478 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10479 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10480 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10481# else
10482# error "Port me"
10483# endif
10484 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10485 }
10486 }
10487 else
10488 {
10489# if defined(RT_ARCH_AMD64)
10490 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10491 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10492# else
10493 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10494# endif
10495 cbInstr = sizeof(paNative[0]);
10496 }
10497 offNative += cbInstr / sizeof(paNative[0]);
10498
10499# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10500 cs_insn *pInstr;
10501 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10502 (uintptr_t)pNativeCur, 1, &pInstr);
10503 if (cInstrs > 0)
10504 {
10505 Assert(cInstrs == 1);
10506# if defined(RT_ARCH_AMD64)
10507 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10508 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10509# else
10510 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10511 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10512# endif
10513 offNative += pInstr->size / sizeof(*pNativeCur);
10514 cs_free(pInstr, cInstrs);
10515 }
10516 else
10517 {
10518# if defined(RT_ARCH_AMD64)
10519 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10520 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10521# else
10522 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10523# endif
10524 offNative++;
10525 }
10526# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10527 }
10528 }
10529
10530#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10531 /* Cleanup. */
10532 cs_close(&hDisasm);
10533#endif
10534}
10535
10536
10537/**
10538 * Recompiles the given threaded TB into a native one.
10539 *
10540 * In case of failure the translation block will be returned as-is.
10541 *
10542 * @returns pTb.
10543 * @param pVCpu The cross context virtual CPU structure of the calling
10544 * thread.
10545 * @param pTb The threaded translation to recompile to native.
10546 */
10547DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10548{
10549 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10550
10551 /*
10552 * The first time thru, we allocate the recompiler state, the other times
10553 * we just need to reset it before using it again.
10554 */
10555 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10556 if (RT_LIKELY(pReNative))
10557 iemNativeReInit(pReNative, pTb);
10558 else
10559 {
10560 pReNative = iemNativeInit(pVCpu, pTb);
10561 AssertReturn(pReNative, pTb);
10562 }
10563
10564 /*
10565 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10566 * for aborting if an error happens.
10567 */
10568 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10569#ifdef LOG_ENABLED
10570 uint32_t const cCallsOrg = cCallsLeft;
10571#endif
10572 uint32_t off = 0;
10573 int rc = VINF_SUCCESS;
10574 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10575 {
10576 /*
10577 * Emit prolog code (fixed).
10578 */
10579 off = iemNativeEmitProlog(pReNative, off);
10580
10581 /*
10582 * Convert the calls to native code.
10583 */
10584#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10585 int32_t iGstInstr = -1;
10586#endif
10587#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10588 uint32_t cThreadedCalls = 0;
10589 uint32_t cRecompiledCalls = 0;
10590#endif
10591 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10592 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10593 while (cCallsLeft-- > 0)
10594 {
10595 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10596
10597 /*
10598 * Debug info and assembly markup.
10599 */
10600 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10601 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10602#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10603 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10604 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10605 {
10606 if (iGstInstr < (int32_t)pTb->cInstructions)
10607 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10608 else
10609 Assert(iGstInstr == pTb->cInstructions);
10610 iGstInstr = pCallEntry->idxInstr;
10611 }
10612 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10613#endif
10614#if defined(VBOX_STRICT)
10615 off = iemNativeEmitMarker(pReNative, off,
10616 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
10617 pCallEntry->enmFunction));
10618#endif
10619#if defined(VBOX_STRICT)
10620 iemNativeRegAssertSanity(pReNative);
10621#endif
10622
10623 /*
10624 * Actual work.
10625 */
10626 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
10627 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "" : "(todo)"));
10628 if (pfnRecom) /** @todo stats on this. */
10629 {
10630 off = pfnRecom(pReNative, off, pCallEntry);
10631 STAM_REL_STATS({cRecompiledCalls++;});
10632 }
10633 else
10634 {
10635 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10636 STAM_REL_STATS({cThreadedCalls++;});
10637 }
10638 Assert(off <= pReNative->cInstrBufAlloc);
10639 Assert(pReNative->cCondDepth == 0);
10640
10641 /*
10642 * Advance.
10643 */
10644 pCallEntry++;
10645 }
10646
10647 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10648 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10649 if (!cThreadedCalls)
10650 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10651
10652 /*
10653 * Emit the epilog code.
10654 */
10655 uint32_t idxReturnLabel;
10656 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10657
10658 /*
10659 * Generate special jump labels.
10660 */
10661 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10662 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10663 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10664 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10665 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
10666 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
10667 }
10668 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10669 {
10670 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10671 return pTb;
10672 }
10673 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10674 Assert(off <= pReNative->cInstrBufAlloc);
10675
10676 /*
10677 * Make sure all labels has been defined.
10678 */
10679 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10680#ifdef VBOX_STRICT
10681 uint32_t const cLabels = pReNative->cLabels;
10682 for (uint32_t i = 0; i < cLabels; i++)
10683 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10684#endif
10685
10686 /*
10687 * Allocate executable memory, copy over the code we've generated.
10688 */
10689 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10690 if (pTbAllocator->pDelayedFreeHead)
10691 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10692
10693 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
10694 AssertReturn(paFinalInstrBuf, pTb);
10695 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10696
10697 /*
10698 * Apply fixups.
10699 */
10700 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10701 uint32_t const cFixups = pReNative->cFixups;
10702 for (uint32_t i = 0; i < cFixups; i++)
10703 {
10704 Assert(paFixups[i].off < off);
10705 Assert(paFixups[i].idxLabel < cLabels);
10706 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10707 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10708 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10709 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10710 switch (paFixups[i].enmType)
10711 {
10712#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10713 case kIemNativeFixupType_Rel32:
10714 Assert(paFixups[i].off + 4 <= off);
10715 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10716 continue;
10717
10718#elif defined(RT_ARCH_ARM64)
10719 case kIemNativeFixupType_RelImm26At0:
10720 {
10721 Assert(paFixups[i].off < off);
10722 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10723 Assert(offDisp >= -262144 && offDisp < 262144);
10724 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10725 continue;
10726 }
10727
10728 case kIemNativeFixupType_RelImm19At5:
10729 {
10730 Assert(paFixups[i].off < off);
10731 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10732 Assert(offDisp >= -262144 && offDisp < 262144);
10733 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10734 continue;
10735 }
10736
10737 case kIemNativeFixupType_RelImm14At5:
10738 {
10739 Assert(paFixups[i].off < off);
10740 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10741 Assert(offDisp >= -8192 && offDisp < 8192);
10742 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10743 continue;
10744 }
10745
10746#endif
10747 case kIemNativeFixupType_Invalid:
10748 case kIemNativeFixupType_End:
10749 break;
10750 }
10751 AssertFailed();
10752 }
10753
10754 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10755 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10756
10757 /*
10758 * Convert the translation block.
10759 */
10760 RTMemFree(pTb->Thrd.paCalls);
10761 pTb->Native.paInstructions = paFinalInstrBuf;
10762 pTb->Native.cInstructions = off;
10763 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10764#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10765 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10766 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10767#endif
10768
10769 Assert(pTbAllocator->cThreadedTbs > 0);
10770 pTbAllocator->cThreadedTbs -= 1;
10771 pTbAllocator->cNativeTbs += 1;
10772 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10773
10774#ifdef LOG_ENABLED
10775 /*
10776 * Disassemble to the log if enabled.
10777 */
10778 if (LogIs3Enabled())
10779 {
10780 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10781 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10782# ifdef DEBUG_bird
10783 RTLogFlush(NULL);
10784# endif
10785 }
10786#endif
10787
10788 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10789 return pTb;
10790}
10791
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette