VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102448

Last change on this file since 102448 was 102447, checked in by vboxsync, 17 months ago

VMM/IEM: movsx & movzx from registers. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 473.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102447 2023-12-04 13:05:53Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144
145/*********************************************************************************************************************************
146* Executable Memory Allocator *
147*********************************************************************************************************************************/
148/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149 * Use an alternative chunk sub-allocator that does store internal data
150 * in the chunk.
151 *
152 * Using the RTHeapSimple is not practial on newer darwin systems where
153 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
154 * memory. We would have to change the protection of the whole chunk for
155 * every call to RTHeapSimple, which would be rather expensive.
156 *
157 * This alternative implemenation let restrict page protection modifications
158 * to the pages backing the executable memory we just allocated.
159 */
160#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
161/** The chunk sub-allocation unit size in bytes. */
162#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
163/** The chunk sub-allocation unit size as a shift factor. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
165
166#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
167# ifdef IEMNATIVE_USE_GDB_JIT
168# define IEMNATIVE_USE_GDB_JIT_ET_DYN
169
170/** GDB JIT: Code entry. */
171typedef struct GDBJITCODEENTRY
172{
173 struct GDBJITCODEENTRY *pNext;
174 struct GDBJITCODEENTRY *pPrev;
175 uint8_t *pbSymFile;
176 uint64_t cbSymFile;
177} GDBJITCODEENTRY;
178
179/** GDB JIT: Actions. */
180typedef enum GDBJITACTIONS : uint32_t
181{
182 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
183} GDBJITACTIONS;
184
185/** GDB JIT: Descriptor. */
186typedef struct GDBJITDESCRIPTOR
187{
188 uint32_t uVersion;
189 GDBJITACTIONS enmAction;
190 GDBJITCODEENTRY *pRelevant;
191 GDBJITCODEENTRY *pHead;
192 /** Our addition: */
193 GDBJITCODEENTRY *pTail;
194} GDBJITDESCRIPTOR;
195
196/** GDB JIT: Our simple symbol file data. */
197typedef struct GDBJITSYMFILE
198{
199 Elf64_Ehdr EHdr;
200# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Shdr aShdrs[5];
202# else
203 Elf64_Shdr aShdrs[7];
204 Elf64_Phdr aPhdrs[2];
205# endif
206 /** The dwarf ehframe data for the chunk. */
207 uint8_t abEhFrame[512];
208 char szzStrTab[128];
209 Elf64_Sym aSymbols[3];
210# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
211 Elf64_Sym aDynSyms[2];
212 Elf64_Dyn aDyn[6];
213# endif
214} GDBJITSYMFILE;
215
216extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
217extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
218
219/** Init once for g_IemNativeGdbJitLock. */
220static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
221/** Init once for the critical section. */
222static RTCRITSECT g_IemNativeGdbJitLock;
223
224/** GDB reads the info here. */
225GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
226
227/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
228DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
229{
230 ASMNopPause();
231}
232
233/** @callback_method_impl{FNRTONCE} */
234static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
235{
236 RT_NOREF(pvUser);
237 return RTCritSectInit(&g_IemNativeGdbJitLock);
238}
239
240
241# endif /* IEMNATIVE_USE_GDB_JIT */
242
243/**
244 * Per-chunk unwind info for non-windows hosts.
245 */
246typedef struct IEMEXECMEMCHUNKEHFRAME
247{
248# ifdef IEMNATIVE_USE_LIBUNWIND
249 /** The offset of the FDA into abEhFrame. */
250 uintptr_t offFda;
251# else
252 /** 'struct object' storage area. */
253 uint8_t abObject[1024];
254# endif
255# ifdef IEMNATIVE_USE_GDB_JIT
256# if 0
257 /** The GDB JIT 'symbol file' data. */
258 GDBJITSYMFILE GdbJitSymFile;
259# endif
260 /** The GDB JIT list entry. */
261 GDBJITCODEENTRY GdbJitEntry;
262# endif
263 /** The dwarf ehframe data for the chunk. */
264 uint8_t abEhFrame[512];
265} IEMEXECMEMCHUNKEHFRAME;
266/** Pointer to per-chunk info info for non-windows hosts. */
267typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
268#endif
269
270
271/**
272 * An chunk of executable memory.
273 */
274typedef struct IEMEXECMEMCHUNK
275{
276#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
277 /** Number of free items in this chunk. */
278 uint32_t cFreeUnits;
279 /** Hint were to start searching for free space in the allocation bitmap. */
280 uint32_t idxFreeHint;
281#else
282 /** The heap handle. */
283 RTHEAPSIMPLE hHeap;
284#endif
285 /** Pointer to the chunk. */
286 void *pvChunk;
287#ifdef IN_RING3
288 /**
289 * Pointer to the unwind information.
290 *
291 * This is used during C++ throw and longjmp (windows and probably most other
292 * platforms). Some debuggers (windbg) makes use of it as well.
293 *
294 * Windows: This is allocated from hHeap on windows because (at least for
295 * AMD64) the UNWIND_INFO structure address in the
296 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
297 *
298 * Others: Allocated from the regular heap to avoid unnecessary executable data
299 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
300 void *pvUnwindInfo;
301#elif defined(IN_RING0)
302 /** Allocation handle. */
303 RTR0MEMOBJ hMemObj;
304#endif
305} IEMEXECMEMCHUNK;
306/** Pointer to a memory chunk. */
307typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
308
309
310/**
311 * Executable memory allocator for the native recompiler.
312 */
313typedef struct IEMEXECMEMALLOCATOR
314{
315 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
316 uint32_t uMagic;
317
318 /** The chunk size. */
319 uint32_t cbChunk;
320 /** The maximum number of chunks. */
321 uint32_t cMaxChunks;
322 /** The current number of chunks. */
323 uint32_t cChunks;
324 /** Hint where to start looking for available memory. */
325 uint32_t idxChunkHint;
326 /** Statistics: Current number of allocations. */
327 uint32_t cAllocations;
328
329 /** The total amount of memory available. */
330 uint64_t cbTotal;
331 /** Total amount of free memory. */
332 uint64_t cbFree;
333 /** Total amount of memory allocated. */
334 uint64_t cbAllocated;
335
336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
337 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
338 *
339 * Since the chunk size is a power of two and the minimum chunk size is a lot
340 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
341 * require a whole number of uint64_t elements in the allocation bitmap. So,
342 * for sake of simplicity, they are allocated as one continous chunk for
343 * simplicity/laziness. */
344 uint64_t *pbmAlloc;
345 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
346 uint32_t cUnitsPerChunk;
347 /** Number of bitmap elements per chunk (for quickly locating the bitmap
348 * portion corresponding to an chunk). */
349 uint32_t cBitmapElementsPerChunk;
350#else
351 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
352 * @{ */
353 /** The size of the heap internal block header. This is used to adjust the
354 * request memory size to make sure there is exacly enough room for a header at
355 * the end of the blocks we allocate before the next 64 byte alignment line. */
356 uint32_t cbHeapBlockHdr;
357 /** The size of initial heap allocation required make sure the first
358 * allocation is correctly aligned. */
359 uint32_t cbHeapAlignTweak;
360 /** The alignment tweak allocation address. */
361 void *pvAlignTweak;
362 /** @} */
363#endif
364
365#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
366 /** Pointer to the array of unwind info running parallel to aChunks (same
367 * allocation as this structure, located after the bitmaps).
368 * (For Windows, the structures must reside in 32-bit RVA distance to the
369 * actual chunk, so they are allocated off the chunk.) */
370 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
371#endif
372
373 /** The allocation chunks. */
374 RT_FLEXIBLE_ARRAY_EXTENSION
375 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
376} IEMEXECMEMALLOCATOR;
377/** Pointer to an executable memory allocator. */
378typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
379
380/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
381#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
382
383
384static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
385
386
387/**
388 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
389 * the heap statistics.
390 */
391static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
392 uint32_t cbReq, uint32_t idxChunk)
393{
394 pExecMemAllocator->cAllocations += 1;
395 pExecMemAllocator->cbAllocated += cbReq;
396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
397 pExecMemAllocator->cbFree -= cbReq;
398#else
399 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
400#endif
401 pExecMemAllocator->idxChunkHint = idxChunk;
402
403#ifdef RT_OS_DARWIN
404 /*
405 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
406 * on darwin. So, we mark the pages returned as read+write after alloc and
407 * expect the caller to call iemExecMemAllocatorReadyForUse when done
408 * writing to the allocation.
409 *
410 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
411 * for details.
412 */
413 /** @todo detect if this is necessary... it wasn't required on 10.15 or
414 * whatever older version it was. */
415 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
416 AssertRC(rc);
417#endif
418
419 return pvRet;
420}
421
422
423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
424static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
425 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
426{
427 /*
428 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
429 */
430 Assert(!(cToScan & 63));
431 Assert(!(idxFirst & 63));
432 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
433 pbmAlloc += idxFirst / 64;
434
435 /*
436 * Scan the bitmap for cReqUnits of consequtive clear bits
437 */
438 /** @todo This can probably be done more efficiently for non-x86 systems. */
439 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
440 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
441 {
442 uint32_t idxAddBit = 1;
443 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
444 idxAddBit++;
445 if (idxAddBit >= cReqUnits)
446 {
447 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
448
449 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
450 pChunk->cFreeUnits -= cReqUnits;
451 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
452
453 void * const pvRet = (uint8_t *)pChunk->pvChunk
454 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
455
456 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
457 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
458 }
459
460 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
461 }
462 return NULL;
463}
464#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
465
466
467static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
468{
469#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
470 /*
471 * Figure out how much to allocate.
472 */
473 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
482 if (pvRet)
483 return pvRet;
484 }
485 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
486 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
487 cReqUnits, idxChunk);
488 }
489#else
490 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
491 if (pvRet)
492 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
493#endif
494 return NULL;
495
496}
497
498
499/**
500 * Allocates @a cbReq bytes of executable memory.
501 *
502 * @returns Pointer to the memory, NULL if out of memory or other problem
503 * encountered.
504 * @param pVCpu The cross context virtual CPU structure of the calling
505 * thread.
506 * @param cbReq How many bytes are required.
507 */
508static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
509{
510 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
511 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
512 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
513
514 /*
515 * Adjust the request size so it'll fit the allocator alignment/whatnot.
516 *
517 * For the RTHeapSimple allocator this means to follow the logic described
518 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
519 * existing chunks if we think we've got sufficient free memory around.
520 *
521 * While for the alternative one we just align it up to a whole unit size.
522 */
523#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
524 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
525#else
526 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
527#endif
528 if (cbReq <= pExecMemAllocator->cbFree)
529 {
530 uint32_t const cChunks = pExecMemAllocator->cChunks;
531 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
532 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
533 {
534 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
535 if (pvRet)
536 return pvRet;
537 }
538 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
539 {
540 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
541 if (pvRet)
542 return pvRet;
543 }
544 }
545
546 /*
547 * Can we grow it with another chunk?
548 */
549 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
550 {
551 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
552 AssertLogRelRCReturn(rc, NULL);
553
554 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
555 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
556 if (pvRet)
557 return pvRet;
558 AssertFailed();
559 }
560
561 /* What now? Prune native translation blocks from the cache? */
562 AssertFailed();
563 return NULL;
564}
565
566
567/** This is a hook that we may need later for changing memory protection back
568 * to readonly+exec */
569static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
570{
571#ifdef RT_OS_DARWIN
572 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
573 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
574 AssertRC(rc); RT_NOREF(pVCpu);
575
576 /*
577 * Flush the instruction cache:
578 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
579 */
580 /* sys_dcache_flush(pv, cb); - not necessary */
581 sys_icache_invalidate(pv, cb);
582#else
583 RT_NOREF(pVCpu, pv, cb);
584#endif
585}
586
587
588/**
589 * Frees executable memory.
590 */
591void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
592{
593 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
594 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
595 Assert(pv);
596#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
597 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
598#else
599 Assert(!((uintptr_t)pv & 63));
600#endif
601
602 /* Align the size as we did when allocating the block. */
603#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
604 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
605#else
606 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
607#endif
608
609 /* Free it / assert sanity. */
610#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
611 uint32_t const cChunks = pExecMemAllocator->cChunks;
612 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
613 bool fFound = false;
614 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
615 {
616 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
617 fFound = offChunk < cbChunk;
618 if (fFound)
619 {
620#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
621 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
622 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
623
624 /* Check that it's valid and free it. */
625 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
626 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
627 for (uint32_t i = 1; i < cReqUnits; i++)
628 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
629 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
630
631 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
632 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
633
634 /* Update the stats. */
635 pExecMemAllocator->cbAllocated -= cb;
636 pExecMemAllocator->cbFree += cb;
637 pExecMemAllocator->cAllocations -= 1;
638 return;
639#else
640 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
641 break;
642#endif
643 }
644 }
645# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
646 AssertFailed();
647# else
648 Assert(fFound);
649# endif
650#endif
651
652#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
653 /* Update stats while cb is freshly calculated.*/
654 pExecMemAllocator->cbAllocated -= cb;
655 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
656 pExecMemAllocator->cAllocations -= 1;
657
658 /* Free it. */
659 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
660#endif
661}
662
663
664
665#ifdef IN_RING3
666# ifdef RT_OS_WINDOWS
667
668/**
669 * Initializes the unwind info structures for windows hosts.
670 */
671static int
672iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
673 void *pvChunk, uint32_t idxChunk)
674{
675 RT_NOREF(pVCpu);
676
677 /*
678 * The AMD64 unwind opcodes.
679 *
680 * This is a program that starts with RSP after a RET instruction that
681 * ends up in recompiled code, and the operations we describe here will
682 * restore all non-volatile registers and bring RSP back to where our
683 * RET address is. This means it's reverse order from what happens in
684 * the prologue.
685 *
686 * Note! Using a frame register approach here both because we have one
687 * and but mainly because the UWOP_ALLOC_LARGE argument values
688 * would be a pain to write initializers for. On the positive
689 * side, we're impervious to changes in the the stack variable
690 * area can can deal with dynamic stack allocations if necessary.
691 */
692 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
693 {
694 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
695 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
696 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
697 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
698 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
699 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
700 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
701 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
702 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
703 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
704 };
705 union
706 {
707 IMAGE_UNWIND_INFO Info;
708 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
709 } s_UnwindInfo =
710 {
711 {
712 /* .Version = */ 1,
713 /* .Flags = */ 0,
714 /* .SizeOfProlog = */ 16, /* whatever */
715 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
716 /* .FrameRegister = */ X86_GREG_xBP,
717 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
718 }
719 };
720 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
721 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
722
723 /*
724 * Calc how much space we need and allocate it off the exec heap.
725 */
726 unsigned const cFunctionEntries = 1;
727 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
728 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
729# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
730 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
731 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
732 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
733# else
734 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
735 - pExecMemAllocator->cbHeapBlockHdr;
736 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
737 32 /*cbAlignment*/);
738# endif
739 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
740 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
741
742 /*
743 * Initialize the structures.
744 */
745 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
746
747 paFunctions[0].BeginAddress = 0;
748 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
749 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
750
751 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
752 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
753
754 /*
755 * Register it.
756 */
757 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
758 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
759
760 return VINF_SUCCESS;
761}
762
763
764# else /* !RT_OS_WINDOWS */
765
766/**
767 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
768 */
769DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
770{
771 if (iValue >= 64)
772 {
773 Assert(iValue < 0x2000);
774 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
775 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
776 }
777 else if (iValue >= 0)
778 *Ptr.pb++ = (uint8_t)iValue;
779 else if (iValue > -64)
780 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
781 else
782 {
783 Assert(iValue > -0x2000);
784 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
785 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
786 }
787 return Ptr;
788}
789
790
791/**
792 * Emits an ULEB128 encoded value (up to 64-bit wide).
793 */
794DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
795{
796 while (uValue >= 0x80)
797 {
798 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
799 uValue >>= 7;
800 }
801 *Ptr.pb++ = (uint8_t)uValue;
802 return Ptr;
803}
804
805
806/**
807 * Emits a CFA rule as register @a uReg + offset @a off.
808 */
809DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
810{
811 *Ptr.pb++ = DW_CFA_def_cfa;
812 Ptr = iemDwarfPutUleb128(Ptr, uReg);
813 Ptr = iemDwarfPutUleb128(Ptr, off);
814 return Ptr;
815}
816
817
818/**
819 * Emits a register (@a uReg) save location:
820 * CFA + @a off * data_alignment_factor
821 */
822DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
823{
824 if (uReg < 0x40)
825 *Ptr.pb++ = DW_CFA_offset | uReg;
826 else
827 {
828 *Ptr.pb++ = DW_CFA_offset_extended;
829 Ptr = iemDwarfPutUleb128(Ptr, uReg);
830 }
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836# if 0 /* unused */
837/**
838 * Emits a register (@a uReg) save location, using signed offset:
839 * CFA + @a offSigned * data_alignment_factor
840 */
841DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
842{
843 *Ptr.pb++ = DW_CFA_offset_extended_sf;
844 Ptr = iemDwarfPutUleb128(Ptr, uReg);
845 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
846 return Ptr;
847}
848# endif
849
850
851/**
852 * Initializes the unwind info section for non-windows hosts.
853 */
854static int
855iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
856 void *pvChunk, uint32_t idxChunk)
857{
858 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
859 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
860
861 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
862
863 /*
864 * Generate the CIE first.
865 */
866# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
867 uint8_t const iDwarfVer = 3;
868# else
869 uint8_t const iDwarfVer = 4;
870# endif
871 RTPTRUNION const PtrCie = Ptr;
872 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
873 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
874 *Ptr.pb++ = iDwarfVer; /* DwARF version */
875 *Ptr.pb++ = 0; /* Augmentation. */
876 if (iDwarfVer >= 4)
877 {
878 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
879 *Ptr.pb++ = 0; /* Segment selector size. */
880 }
881# ifdef RT_ARCH_AMD64
882 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
883# else
884 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
885# endif
886 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
887# ifdef RT_ARCH_AMD64
888 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
889# elif defined(RT_ARCH_ARM64)
890 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
891# else
892# error "port me"
893# endif
894 /* Initial instructions: */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
897 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
898 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
899 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
904# elif defined(RT_ARCH_ARM64)
905# if 1
906 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
907# else
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
909# endif
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
922 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
923 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
924# else
925# error "port me"
926# endif
927 while ((Ptr.u - PtrCie.u) & 3)
928 *Ptr.pb++ = DW_CFA_nop;
929 /* Finalize the CIE size. */
930 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
931
932 /*
933 * Generate an FDE for the whole chunk area.
934 */
935# ifdef IEMNATIVE_USE_LIBUNWIND
936 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
937# endif
938 RTPTRUNION const PtrFde = Ptr;
939 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
940 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
941 Ptr.pu32++;
942 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
943 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
944# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
945 *Ptr.pb++ = DW_CFA_nop;
946# endif
947 while ((Ptr.u - PtrFde.u) & 3)
948 *Ptr.pb++ = DW_CFA_nop;
949 /* Finalize the FDE size. */
950 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
951
952 /* Terminator entry. */
953 *Ptr.pu32++ = 0;
954 *Ptr.pu32++ = 0; /* just to be sure... */
955 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
956
957 /*
958 * Register it.
959 */
960# ifdef IEMNATIVE_USE_LIBUNWIND
961 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
962# else
963 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
964 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
965# endif
966
967# ifdef IEMNATIVE_USE_GDB_JIT
968 /*
969 * Now for telling GDB about this (experimental).
970 *
971 * This seems to work best with ET_DYN.
972 */
973 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
974# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
975 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
976 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
977# else
978 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
979 - pExecMemAllocator->cbHeapBlockHdr;
980 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
981# endif
982 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
983 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
984
985 RT_ZERO(*pSymFile);
986
987 /*
988 * The ELF header:
989 */
990 pSymFile->EHdr.e_ident[0] = ELFMAG0;
991 pSymFile->EHdr.e_ident[1] = ELFMAG1;
992 pSymFile->EHdr.e_ident[2] = ELFMAG2;
993 pSymFile->EHdr.e_ident[3] = ELFMAG3;
994 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
995 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
996 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
997 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
998# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
999 pSymFile->EHdr.e_type = ET_DYN;
1000# else
1001 pSymFile->EHdr.e_type = ET_REL;
1002# endif
1003# ifdef RT_ARCH_AMD64
1004 pSymFile->EHdr.e_machine = EM_AMD64;
1005# elif defined(RT_ARCH_ARM64)
1006 pSymFile->EHdr.e_machine = EM_AARCH64;
1007# else
1008# error "port me"
1009# endif
1010 pSymFile->EHdr.e_version = 1; /*?*/
1011 pSymFile->EHdr.e_entry = 0;
1012# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1013 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1014# else
1015 pSymFile->EHdr.e_phoff = 0;
1016# endif
1017 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1018 pSymFile->EHdr.e_flags = 0;
1019 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1022 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1023# else
1024 pSymFile->EHdr.e_phentsize = 0;
1025 pSymFile->EHdr.e_phnum = 0;
1026# endif
1027 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1028 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1029 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1030
1031 uint32_t offStrTab = 0;
1032#define APPEND_STR(a_szStr) do { \
1033 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1034 offStrTab += sizeof(a_szStr); \
1035 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1036 } while (0)
1037#define APPEND_STR_FMT(a_szStr, ...) do { \
1038 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1039 offStrTab++; \
1040 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1041 } while (0)
1042
1043 /*
1044 * Section headers.
1045 */
1046 /* Section header #0: NULL */
1047 unsigned i = 0;
1048 APPEND_STR("");
1049 RT_ZERO(pSymFile->aShdrs[i]);
1050 i++;
1051
1052 /* Section header: .eh_frame */
1053 pSymFile->aShdrs[i].sh_name = offStrTab;
1054 APPEND_STR(".eh_frame");
1055 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1056 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1057# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1058 pSymFile->aShdrs[i].sh_offset
1059 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1060# else
1061 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1062 pSymFile->aShdrs[i].sh_offset = 0;
1063# endif
1064
1065 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1066 pSymFile->aShdrs[i].sh_link = 0;
1067 pSymFile->aShdrs[i].sh_info = 0;
1068 pSymFile->aShdrs[i].sh_addralign = 1;
1069 pSymFile->aShdrs[i].sh_entsize = 0;
1070 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1071 i++;
1072
1073 /* Section header: .shstrtab */
1074 unsigned const iShStrTab = i;
1075 pSymFile->EHdr.e_shstrndx = iShStrTab;
1076 pSymFile->aShdrs[i].sh_name = offStrTab;
1077 APPEND_STR(".shstrtab");
1078 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1079 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1080# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1081 pSymFile->aShdrs[i].sh_offset
1082 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1083# else
1084 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1085 pSymFile->aShdrs[i].sh_offset = 0;
1086# endif
1087 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1088 pSymFile->aShdrs[i].sh_link = 0;
1089 pSymFile->aShdrs[i].sh_info = 0;
1090 pSymFile->aShdrs[i].sh_addralign = 1;
1091 pSymFile->aShdrs[i].sh_entsize = 0;
1092 i++;
1093
1094 /* Section header: .symbols */
1095 pSymFile->aShdrs[i].sh_name = offStrTab;
1096 APPEND_STR(".symtab");
1097 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1098 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1102 pSymFile->aShdrs[i].sh_link = iShStrTab;
1103 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1104 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1105 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1106 i++;
1107
1108# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1109 /* Section header: .symbols */
1110 pSymFile->aShdrs[i].sh_name = offStrTab;
1111 APPEND_STR(".dynsym");
1112 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1113 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1114 pSymFile->aShdrs[i].sh_offset
1115 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1116 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1117 pSymFile->aShdrs[i].sh_link = iShStrTab;
1118 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1119 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1120 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1121 i++;
1122# endif
1123
1124# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1125 /* Section header: .dynamic */
1126 pSymFile->aShdrs[i].sh_name = offStrTab;
1127 APPEND_STR(".dynamic");
1128 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1129 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1130 pSymFile->aShdrs[i].sh_offset
1131 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1132 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1133 pSymFile->aShdrs[i].sh_link = iShStrTab;
1134 pSymFile->aShdrs[i].sh_info = 0;
1135 pSymFile->aShdrs[i].sh_addralign = 1;
1136 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1137 i++;
1138# endif
1139
1140 /* Section header: .text */
1141 unsigned const iShText = i;
1142 pSymFile->aShdrs[i].sh_name = offStrTab;
1143 APPEND_STR(".text");
1144 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1145 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1146# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1147 pSymFile->aShdrs[i].sh_offset
1148 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1149# else
1150 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1151 pSymFile->aShdrs[i].sh_offset = 0;
1152# endif
1153 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1154 pSymFile->aShdrs[i].sh_link = 0;
1155 pSymFile->aShdrs[i].sh_info = 0;
1156 pSymFile->aShdrs[i].sh_addralign = 1;
1157 pSymFile->aShdrs[i].sh_entsize = 0;
1158 i++;
1159
1160 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1161
1162# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1163 /*
1164 * The program headers:
1165 */
1166 /* Everything in a single LOAD segment: */
1167 i = 0;
1168 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1169 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1170 pSymFile->aPhdrs[i].p_offset
1171 = pSymFile->aPhdrs[i].p_vaddr
1172 = pSymFile->aPhdrs[i].p_paddr = 0;
1173 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1174 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1175 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1176 i++;
1177 /* The .dynamic segment. */
1178 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1179 pSymFile->aPhdrs[i].p_flags = PF_R;
1180 pSymFile->aPhdrs[i].p_offset
1181 = pSymFile->aPhdrs[i].p_vaddr
1182 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1183 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1184 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1185 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1186 i++;
1187
1188 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1189
1190 /*
1191 * The dynamic section:
1192 */
1193 i = 0;
1194 pSymFile->aDyn[i].d_tag = DT_SONAME;
1195 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1196 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1197 i++;
1198 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1199 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1202 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1205 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1206 i++;
1207 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1208 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_NULL;
1211 i++;
1212 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1213# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1214
1215 /*
1216 * Symbol tables:
1217 */
1218 /** @todo gdb doesn't seem to really like this ... */
1219 i = 0;
1220 pSymFile->aSymbols[i].st_name = 0;
1221 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1222 pSymFile->aSymbols[i].st_value = 0;
1223 pSymFile->aSymbols[i].st_size = 0;
1224 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1225 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1226# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1227 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1228# endif
1229 i++;
1230
1231 pSymFile->aSymbols[i].st_name = 0;
1232 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1233 pSymFile->aSymbols[i].st_value = 0;
1234 pSymFile->aSymbols[i].st_size = 0;
1235 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1236 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = offStrTab;
1240 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1241# if 0
1242 pSymFile->aSymbols[i].st_shndx = iShText;
1243 pSymFile->aSymbols[i].st_value = 0;
1244# else
1245 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1246 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1247# endif
1248 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1252 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1253 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1254# endif
1255 i++;
1256
1257 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1258 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1259
1260 /*
1261 * The GDB JIT entry and informing GDB.
1262 */
1263 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1264# if 1
1265 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1266# else
1267 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1268# endif
1269
1270 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1271 RTCritSectEnter(&g_IemNativeGdbJitLock);
1272 pEhFrame->GdbJitEntry.pNext = NULL;
1273 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1274 if (__jit_debug_descriptor.pTail)
1275 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1276 else
1277 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1278 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1279 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1280
1281 /* Notify GDB: */
1282 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1283 __jit_debug_register_code();
1284 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1285 RTCritSectLeave(&g_IemNativeGdbJitLock);
1286
1287# else /* !IEMNATIVE_USE_GDB_JIT */
1288 RT_NOREF(pVCpu);
1289# endif /* !IEMNATIVE_USE_GDB_JIT */
1290
1291 return VINF_SUCCESS;
1292}
1293
1294# endif /* !RT_OS_WINDOWS */
1295#endif /* IN_RING3 */
1296
1297
1298/**
1299 * Adds another chunk to the executable memory allocator.
1300 *
1301 * This is used by the init code for the initial allocation and later by the
1302 * regular allocator function when it's out of memory.
1303 */
1304static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1305{
1306 /* Check that we've room for growth. */
1307 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1308 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1309
1310 /* Allocate a chunk. */
1311#ifdef RT_OS_DARWIN
1312 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1313#else
1314 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1315#endif
1316 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1317
1318#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1319 int rc = VINF_SUCCESS;
1320#else
1321 /* Initialize the heap for the chunk. */
1322 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1323 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1324 AssertRC(rc);
1325 if (RT_SUCCESS(rc))
1326 {
1327 /*
1328 * We want the memory to be aligned on 64 byte, so the first time thru
1329 * here we do some exploratory allocations to see how we can achieve this.
1330 * On subsequent runs we only make an initial adjustment allocation, if
1331 * necessary.
1332 *
1333 * Since we own the heap implementation, we know that the internal block
1334 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1335 * so all we need to wrt allocation size adjustments is to add 32 bytes
1336 * to the size, align up by 64 bytes, and subtract 32 bytes.
1337 *
1338 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1339 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1340 * allocation to force subsequent allocations to return 64 byte aligned
1341 * user areas.
1342 */
1343 if (!pExecMemAllocator->cbHeapBlockHdr)
1344 {
1345 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1346 pExecMemAllocator->cbHeapAlignTweak = 64;
1347 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1348 32 /*cbAlignment*/);
1349 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1350
1351 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1352 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1353 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1354 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1355 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1356
1357 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1358 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1359 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1360 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1361 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1362
1363 RTHeapSimpleFree(hHeap, pvTest2);
1364 RTHeapSimpleFree(hHeap, pvTest1);
1365 }
1366 else
1367 {
1368 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1369 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1370 }
1371 if (RT_SUCCESS(rc))
1372#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1373 {
1374 /*
1375 * Add the chunk.
1376 *
1377 * This must be done before the unwind init so windows can allocate
1378 * memory from the chunk when using the alternative sub-allocator.
1379 */
1380 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1381#ifdef IN_RING3
1382 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1383#endif
1384#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1385 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1386#else
1387 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1388 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1389 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1390 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1391#endif
1392
1393 pExecMemAllocator->cChunks = idxChunk + 1;
1394 pExecMemAllocator->idxChunkHint = idxChunk;
1395
1396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1397 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1398 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1399#else
1400 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1401 pExecMemAllocator->cbTotal += cbFree;
1402 pExecMemAllocator->cbFree += cbFree;
1403#endif
1404
1405#ifdef IN_RING3
1406 /*
1407 * Initialize the unwind information (this cannot really fail atm).
1408 * (This sets pvUnwindInfo.)
1409 */
1410 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1411 if (RT_SUCCESS(rc))
1412#endif
1413 {
1414 return VINF_SUCCESS;
1415 }
1416
1417#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1418 /* Just in case the impossible happens, undo the above up: */
1419 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1420 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1421 pExecMemAllocator->cChunks = idxChunk;
1422 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1423 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1424 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1425 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1426#endif
1427 }
1428#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1429 }
1430#endif
1431 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1432 RT_NOREF(pVCpu);
1433 return rc;
1434}
1435
1436
1437/**
1438 * Initializes the executable memory allocator for native recompilation on the
1439 * calling EMT.
1440 *
1441 * @returns VBox status code.
1442 * @param pVCpu The cross context virtual CPU structure of the calling
1443 * thread.
1444 * @param cbMax The max size of the allocator.
1445 * @param cbInitial The initial allocator size.
1446 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1447 * dependent).
1448 */
1449int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1450{
1451 /*
1452 * Validate input.
1453 */
1454 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1455 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1456 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1457 || cbChunk == 0
1458 || ( RT_IS_POWER_OF_TWO(cbChunk)
1459 && cbChunk >= _1M
1460 && cbChunk <= _256M
1461 && cbChunk <= cbMax),
1462 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1463 VERR_OUT_OF_RANGE);
1464
1465 /*
1466 * Adjust/figure out the chunk size.
1467 */
1468 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1469 {
1470 if (cbMax >= _256M)
1471 cbChunk = _64M;
1472 else
1473 {
1474 if (cbMax < _16M)
1475 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1476 else
1477 cbChunk = (uint32_t)cbMax / 4;
1478 if (!RT_IS_POWER_OF_TWO(cbChunk))
1479 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1480 }
1481 }
1482
1483 if (cbChunk > cbMax)
1484 cbMax = cbChunk;
1485 else
1486 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1487 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1488 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1489
1490 /*
1491 * Allocate and initialize the allocatore instance.
1492 */
1493 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1494#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1495 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1496 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1497 cbNeeded += cbBitmap * cMaxChunks;
1498 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1499 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1500#endif
1501#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1502 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1503 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1504#endif
1505 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1506 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1507 VERR_NO_MEMORY);
1508 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1509 pExecMemAllocator->cbChunk = cbChunk;
1510 pExecMemAllocator->cMaxChunks = cMaxChunks;
1511 pExecMemAllocator->cChunks = 0;
1512 pExecMemAllocator->idxChunkHint = 0;
1513 pExecMemAllocator->cAllocations = 0;
1514 pExecMemAllocator->cbTotal = 0;
1515 pExecMemAllocator->cbFree = 0;
1516 pExecMemAllocator->cbAllocated = 0;
1517#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1518 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1519 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1520 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1521 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1522#endif
1523#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1524 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1525#endif
1526 for (uint32_t i = 0; i < cMaxChunks; i++)
1527 {
1528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1529 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1530 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1531#else
1532 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1533#endif
1534 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1535#ifdef IN_RING0
1536 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1537#else
1538 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1539#endif
1540 }
1541 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1542
1543 /*
1544 * Do the initial allocations.
1545 */
1546 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1547 {
1548 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1549 AssertLogRelRCReturn(rc, rc);
1550 }
1551
1552 pExecMemAllocator->idxChunkHint = 0;
1553
1554 return VINF_SUCCESS;
1555}
1556
1557
1558/*********************************************************************************************************************************
1559* Native Recompilation *
1560*********************************************************************************************************************************/
1561
1562
1563/**
1564 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1565 */
1566IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1567{
1568 pVCpu->iem.s.cInstructions += idxInstr;
1569 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1570}
1571
1572
1573/**
1574 * Used by TB code when it wants to raise a \#GP(0).
1575 */
1576IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1577{
1578#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1579 pVCpu->iem.s.idxTbCurInstr = idxInstr;
1580#else
1581 RT_NOREF(idxInstr);
1582#endif
1583 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1584#ifndef _MSC_VER
1585 return VINF_IEM_RAISED_XCPT; /* not reached */
1586#endif
1587}
1588
1589
1590/*********************************************************************************************************************************
1591* Helpers: Segmented memory fetches and stores. *
1592*********************************************************************************************************************************/
1593
1594/**
1595 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1596 */
1597IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1598{
1599 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1600}
1601
1602
1603/**
1604 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1605 * to 16 bits.
1606 */
1607IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1608{
1609 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1610}
1611
1612
1613/**
1614 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1615 * to 32 bits.
1616 */
1617IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1618{
1619 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1620}
1621
1622/**
1623 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1624 * to 64 bits.
1625 */
1626IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1627{
1628 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1629}
1630
1631
1632/**
1633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1634 */
1635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1636{
1637 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1638}
1639
1640
1641/**
1642 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1643 * to 32 bits.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1646{
1647 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1648}
1649
1650
1651/**
1652 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1653 * to 64 bits.
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1656{
1657 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1658}
1659
1660
1661/**
1662 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1663 */
1664IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1665{
1666 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1667}
1668
1669
1670/**
1671 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1672 * to 64 bits.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1675{
1676 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1677}
1678
1679
1680/**
1681 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1682 */
1683IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1684{
1685 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1686}
1687
1688
1689/**
1690 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1691 */
1692IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1693{
1694 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1695}
1696
1697
1698/**
1699 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1702{
1703 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1704}
1705
1706
1707/**
1708 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1709 */
1710IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1711{
1712 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1713}
1714
1715
1716/**
1717 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1720{
1721 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1722}
1723
1724
1725
1726/*********************************************************************************************************************************
1727* Helpers: Flat memory fetches and stores. *
1728*********************************************************************************************************************************/
1729
1730/**
1731 * Used by TB code to load unsigned 8-bit data w/ flat address.
1732 * @note Zero extending the value to 64-bit to simplify assembly.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1735{
1736 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1737}
1738
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1742 * to 16 bits.
1743 * @note Zero extending the value to 64-bit to simplify assembly.
1744 */
1745IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1746{
1747 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1753 * to 32 bits.
1754 * @note Zero extending the value to 64-bit to simplify assembly.
1755 */
1756IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1757{
1758 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1759}
1760
1761
1762/**
1763 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1764 * to 64 bits.
1765 */
1766IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1767{
1768 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1769}
1770
1771
1772/**
1773 * Used by TB code to load unsigned 16-bit data w/ flat address.
1774 * @note Zero extending the value to 64-bit to simplify assembly.
1775 */
1776IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1777{
1778 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1779}
1780
1781
1782/**
1783 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1784 * to 32 bits.
1785 * @note Zero extending the value to 64-bit to simplify assembly.
1786 */
1787IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1788{
1789 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1790}
1791
1792
1793/**
1794 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1795 * to 64 bits.
1796 * @note Zero extending the value to 64-bit to simplify assembly.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1799{
1800 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1801}
1802
1803
1804/**
1805 * Used by TB code to load unsigned 32-bit data w/ flat address.
1806 * @note Zero extending the value to 64-bit to simplify assembly.
1807 */
1808IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1809{
1810 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1811}
1812
1813
1814/**
1815 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1816 * to 64 bits.
1817 * @note Zero extending the value to 64-bit to simplify assembly.
1818 */
1819IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1820{
1821 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1822}
1823
1824
1825/**
1826 * Used by TB code to load unsigned 64-bit data w/ flat address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1829{
1830 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1831}
1832
1833
1834/**
1835 * Used by TB code to store unsigned 8-bit data w/ flat address.
1836 */
1837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1838{
1839 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1840}
1841
1842
1843/**
1844 * Used by TB code to store unsigned 16-bit data w/ flat address.
1845 */
1846IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1847{
1848 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1849}
1850
1851
1852/**
1853 * Used by TB code to store unsigned 32-bit data w/ flat address.
1854 */
1855IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1856{
1857 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 64-bit data w/ flat address.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1865{
1866 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1867}
1868
1869
1870/*********************************************************************************************************************************
1871* Helpers: Segmented memory mapping. *
1872*********************************************************************************************************************************/
1873
1874/**
1875 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1878 RTGCPTR GCPtrMem, uint8_t iSegReg))
1879{
1880 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
1881}
1882
1883
1884/**
1885 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1886 */
1887IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1888 RTGCPTR GCPtrMem, uint8_t iSegReg))
1889{
1890 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
1891}
1892
1893
1894/**
1895 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1896 */
1897IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1898 RTGCPTR GCPtrMem, uint8_t iSegReg))
1899{
1900 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1908 RTGCPTR GCPtrMem, uint8_t iSegReg))
1909{
1910 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
1911}
1912
1913
1914/**
1915 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1918 RTGCPTR GCPtrMem, uint8_t iSegReg))
1919{
1920 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
1921}
1922
1923
1924/**
1925 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1926 */
1927IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1928 RTGCPTR GCPtrMem, uint8_t iSegReg))
1929{
1930 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
1931}
1932
1933
1934/**
1935 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1936 */
1937IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1938 RTGCPTR GCPtrMem, uint8_t iSegReg))
1939{
1940 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
1941}
1942
1943
1944/**
1945 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1946 */
1947IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1948 RTGCPTR GCPtrMem, uint8_t iSegReg))
1949{
1950 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
1951}
1952
1953
1954/**
1955 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1956 */
1957IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1958 RTGCPTR GCPtrMem, uint8_t iSegReg))
1959{
1960 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
1961}
1962
1963
1964/**
1965 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1966 */
1967IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1968 RTGCPTR GCPtrMem, uint8_t iSegReg))
1969{
1970 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
1971}
1972
1973
1974/**
1975 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1976 */
1977IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1978 RTGCPTR GCPtrMem, uint8_t iSegReg))
1979{
1980 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
1981}
1982
1983
1984/**
1985 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1986 */
1987IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1988 RTGCPTR GCPtrMem, uint8_t iSegReg))
1989{
1990 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
1991}
1992
1993
1994/**
1995 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1996 */
1997IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1998 RTGCPTR GCPtrMem, uint8_t iSegReg))
1999{
2000 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
2001}
2002
2003
2004/**
2005 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2006 */
2007IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2008 RTGCPTR GCPtrMem, uint8_t iSegReg))
2009{
2010 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
2011}
2012
2013
2014/**
2015 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2016 */
2017IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2018 RTGCPTR GCPtrMem, uint8_t iSegReg))
2019{
2020 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
2021}
2022
2023
2024/**
2025 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2026 */
2027IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2028 RTGCPTR GCPtrMem, uint8_t iSegReg))
2029{
2030 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
2031}
2032
2033
2034/**
2035 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2036 */
2037IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2038 RTGCPTR GCPtrMem, uint8_t iSegReg))
2039{
2040 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
2041}
2042
2043
2044/*********************************************************************************************************************************
2045* Helpers: Flat memory mapping. *
2046*********************************************************************************************************************************/
2047
2048/**
2049 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2050 */
2051IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2052{
2053 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
2054}
2055
2056
2057/**
2058 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2059 */
2060IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2061{
2062 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
2063}
2064
2065
2066/**
2067 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2068 */
2069IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2070{
2071 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
2072}
2073
2074
2075/**
2076 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2079{
2080 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
2081}
2082
2083
2084/**
2085 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2086 */
2087IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2088{
2089 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
2090}
2091
2092
2093/**
2094 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2095 */
2096IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2097{
2098 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
2099}
2100
2101
2102/**
2103 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2104 */
2105IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2106{
2107 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
2108}
2109
2110
2111/**
2112 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2113 */
2114IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2115{
2116 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
2117}
2118
2119
2120/**
2121 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2122 */
2123IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2124{
2125 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
2126}
2127
2128
2129/**
2130 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2131 */
2132IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2133{
2134 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
2135}
2136
2137
2138/**
2139 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2140 */
2141IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2142{
2143 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
2144}
2145
2146
2147/**
2148 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2149 */
2150IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2151{
2152 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
2153}
2154
2155
2156/**
2157 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2160{
2161 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
2162}
2163
2164
2165/**
2166 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2167 */
2168IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2169{
2170 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
2171}
2172
2173
2174/**
2175 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2176 */
2177IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2178{
2179 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
2180}
2181
2182
2183/**
2184 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2187{
2188 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
2189}
2190
2191
2192/**
2193 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2194 */
2195IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2196{
2197 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
2198}
2199
2200
2201/*********************************************************************************************************************************
2202* Helpers: Commit, rollback & unmap *
2203*********************************************************************************************************************************/
2204
2205/**
2206 * Used by TB code to commit and unmap a read-write memory mapping.
2207 */
2208IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2209{
2210 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2211}
2212
2213
2214/**
2215 * Used by TB code to commit and unmap a write-only memory mapping.
2216 */
2217IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2218{
2219 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2220}
2221
2222
2223/**
2224 * Used by TB code to commit and unmap a read-only memory mapping.
2225 */
2226IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2227{
2228 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2229}
2230
2231
2232/**
2233 * Reinitializes the native recompiler state.
2234 *
2235 * Called before starting a new recompile job.
2236 */
2237static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2238{
2239 pReNative->cLabels = 0;
2240 pReNative->bmLabelTypes = 0;
2241 pReNative->cFixups = 0;
2242#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2243 pReNative->pDbgInfo->cEntries = 0;
2244#endif
2245 pReNative->pTbOrg = pTb;
2246 pReNative->cCondDepth = 0;
2247 pReNative->uCondSeqNo = 0;
2248 pReNative->uCheckIrqSeqNo = 0;
2249 pReNative->uTlbSeqNo = 0;
2250
2251 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2252#if IEMNATIVE_HST_GREG_COUNT < 32
2253 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2254#endif
2255 ;
2256 pReNative->Core.bmHstRegsWithGstShadow = 0;
2257 pReNative->Core.bmGstRegShadows = 0;
2258 pReNative->Core.bmVars = 0;
2259 pReNative->Core.bmStack = 0;
2260 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2261 pReNative->Core.u64ArgVars = UINT64_MAX;
2262
2263 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 6);
2264 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2265 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2266 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2267 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2268 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2269 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2270
2271 /* Full host register reinit: */
2272 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2273 {
2274 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2275 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2276 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2277 }
2278
2279 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2280 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2281#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2282 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2283#endif
2284#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2285 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2286#endif
2287 );
2288 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2289 {
2290 fRegs &= ~RT_BIT_32(idxReg);
2291 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2292 }
2293
2294 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2295#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2296 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2297#endif
2298#ifdef IEMNATIVE_REG_FIXED_TMP0
2299 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2300#endif
2301 return pReNative;
2302}
2303
2304
2305/**
2306 * Allocates and initializes the native recompiler state.
2307 *
2308 * This is called the first time an EMT wants to recompile something.
2309 *
2310 * @returns Pointer to the new recompiler state.
2311 * @param pVCpu The cross context virtual CPU structure of the calling
2312 * thread.
2313 * @param pTb The TB that's about to be recompiled.
2314 * @thread EMT(pVCpu)
2315 */
2316static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2317{
2318 VMCPU_ASSERT_EMT(pVCpu);
2319
2320 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2321 AssertReturn(pReNative, NULL);
2322
2323 /*
2324 * Try allocate all the buffers and stuff we need.
2325 */
2326 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2327 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2328 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2329#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2330 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2331#endif
2332 if (RT_LIKELY( pReNative->pInstrBuf
2333 && pReNative->paLabels
2334 && pReNative->paFixups)
2335#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2336 && pReNative->pDbgInfo
2337#endif
2338 )
2339 {
2340 /*
2341 * Set the buffer & array sizes on success.
2342 */
2343 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2344 pReNative->cLabelsAlloc = _8K;
2345 pReNative->cFixupsAlloc = _16K;
2346#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2347 pReNative->cDbgInfoAlloc = _16K;
2348#endif
2349
2350 /*
2351 * Done, just need to save it and reinit it.
2352 */
2353 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2354 return iemNativeReInit(pReNative, pTb);
2355 }
2356
2357 /*
2358 * Failed. Cleanup and return.
2359 */
2360 AssertFailed();
2361 RTMemFree(pReNative->pInstrBuf);
2362 RTMemFree(pReNative->paLabels);
2363 RTMemFree(pReNative->paFixups);
2364#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2365 RTMemFree(pReNative->pDbgInfo);
2366#endif
2367 RTMemFree(pReNative);
2368 return NULL;
2369}
2370
2371
2372/**
2373 * Creates a label
2374 *
2375 * If the label does not yet have a defined position,
2376 * call iemNativeLabelDefine() later to set it.
2377 *
2378 * @returns Label ID. Throws VBox status code on failure, so no need to check
2379 * the return value.
2380 * @param pReNative The native recompile state.
2381 * @param enmType The label type.
2382 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2383 * label is not yet defined (default).
2384 * @param uData Data associated with the lable. Only applicable to
2385 * certain type of labels. Default is zero.
2386 */
2387DECL_HIDDEN_THROW(uint32_t)
2388iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2389 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2390{
2391 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2392
2393 /*
2394 * Locate existing label definition.
2395 *
2396 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2397 * and uData is zero.
2398 */
2399 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2400 uint32_t const cLabels = pReNative->cLabels;
2401 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2402#ifndef VBOX_STRICT
2403 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2404 && offWhere == UINT32_MAX
2405 && uData == 0
2406#endif
2407 )
2408 {
2409#ifndef VBOX_STRICT
2410 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2411 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2412 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2413 if (idxLabel < pReNative->cLabels)
2414 return idxLabel;
2415#else
2416 for (uint32_t i = 0; i < cLabels; i++)
2417 if ( paLabels[i].enmType == enmType
2418 && paLabels[i].uData == uData)
2419 {
2420 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2421 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2422 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2423 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2425 return i;
2426 }
2427 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2428 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2429#endif
2430 }
2431
2432 /*
2433 * Make sure we've got room for another label.
2434 */
2435 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2436 { /* likely */ }
2437 else
2438 {
2439 uint32_t cNew = pReNative->cLabelsAlloc;
2440 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2441 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2442 cNew *= 2;
2443 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2444 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2445 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2446 pReNative->paLabels = paLabels;
2447 pReNative->cLabelsAlloc = cNew;
2448 }
2449
2450 /*
2451 * Define a new label.
2452 */
2453 paLabels[cLabels].off = offWhere;
2454 paLabels[cLabels].enmType = enmType;
2455 paLabels[cLabels].uData = uData;
2456 pReNative->cLabels = cLabels + 1;
2457
2458 Assert((unsigned)enmType < 64);
2459 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2460
2461 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2462 {
2463 Assert(uData == 0);
2464 pReNative->aidxUniqueLabels[enmType] = cLabels;
2465 }
2466
2467 if (offWhere != UINT32_MAX)
2468 {
2469#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2470 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2471 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2472#endif
2473 }
2474 return cLabels;
2475}
2476
2477
2478/**
2479 * Defines the location of an existing label.
2480 *
2481 * @param pReNative The native recompile state.
2482 * @param idxLabel The label to define.
2483 * @param offWhere The position.
2484 */
2485DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2486{
2487 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2488 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2489 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2490 pLabel->off = offWhere;
2491#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2492 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2493 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2494#endif
2495}
2496
2497
2498/**
2499 * Looks up a lable.
2500 *
2501 * @returns Label ID if found, UINT32_MAX if not.
2502 */
2503static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2504 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2505{
2506 Assert((unsigned)enmType < 64);
2507 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2508 {
2509 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2510 return pReNative->aidxUniqueLabels[enmType];
2511
2512 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2513 uint32_t const cLabels = pReNative->cLabels;
2514 for (uint32_t i = 0; i < cLabels; i++)
2515 if ( paLabels[i].enmType == enmType
2516 && paLabels[i].uData == uData
2517 && ( paLabels[i].off == offWhere
2518 || offWhere == UINT32_MAX
2519 || paLabels[i].off == UINT32_MAX))
2520 return i;
2521 }
2522 return UINT32_MAX;
2523}
2524
2525
2526/**
2527 * Adds a fixup.
2528 *
2529 * @throws VBox status code (int) on failure.
2530 * @param pReNative The native recompile state.
2531 * @param offWhere The instruction offset of the fixup location.
2532 * @param idxLabel The target label ID for the fixup.
2533 * @param enmType The fixup type.
2534 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2535 */
2536DECL_HIDDEN_THROW(void)
2537iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2538 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2539{
2540 Assert(idxLabel <= UINT16_MAX);
2541 Assert((unsigned)enmType <= UINT8_MAX);
2542
2543 /*
2544 * Make sure we've room.
2545 */
2546 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2547 uint32_t const cFixups = pReNative->cFixups;
2548 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2549 { /* likely */ }
2550 else
2551 {
2552 uint32_t cNew = pReNative->cFixupsAlloc;
2553 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2554 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2555 cNew *= 2;
2556 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2557 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2558 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2559 pReNative->paFixups = paFixups;
2560 pReNative->cFixupsAlloc = cNew;
2561 }
2562
2563 /*
2564 * Add the fixup.
2565 */
2566 paFixups[cFixups].off = offWhere;
2567 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2568 paFixups[cFixups].enmType = enmType;
2569 paFixups[cFixups].offAddend = offAddend;
2570 pReNative->cFixups = cFixups + 1;
2571}
2572
2573
2574/**
2575 * Slow code path for iemNativeInstrBufEnsure.
2576 */
2577DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2578{
2579 /* Double the buffer size till we meet the request. */
2580 uint32_t cNew = pReNative->cInstrBufAlloc;
2581 AssertReturn(cNew > 0, NULL);
2582 do
2583 cNew *= 2;
2584 while (cNew < off + cInstrReq);
2585
2586 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2587#ifdef RT_ARCH_ARM64
2588 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2589#else
2590 uint32_t const cbMaxInstrBuf = _2M;
2591#endif
2592 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2593
2594 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2595 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2596
2597 pReNative->cInstrBufAlloc = cNew;
2598 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2599}
2600
2601#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2602
2603/**
2604 * Grows the static debug info array used during recompilation.
2605 *
2606 * @returns Pointer to the new debug info block; throws VBox status code on
2607 * failure, so no need to check the return value.
2608 */
2609DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2610{
2611 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2612 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2613 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2614 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2615 pReNative->pDbgInfo = pDbgInfo;
2616 pReNative->cDbgInfoAlloc = cNew;
2617 return pDbgInfo;
2618}
2619
2620
2621/**
2622 * Adds a new debug info uninitialized entry, returning the pointer to it.
2623 */
2624DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2625{
2626 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2627 { /* likely */ }
2628 else
2629 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2630 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2631}
2632
2633
2634/**
2635 * Debug Info: Adds a native offset record, if necessary.
2636 */
2637static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2638{
2639 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2640
2641 /*
2642 * Search backwards to see if we've got a similar record already.
2643 */
2644 uint32_t idx = pDbgInfo->cEntries;
2645 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2646 while (idx-- > idxStop)
2647 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2648 {
2649 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2650 return;
2651 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2652 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2653 break;
2654 }
2655
2656 /*
2657 * Add it.
2658 */
2659 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2660 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2661 pEntry->NativeOffset.offNative = off;
2662}
2663
2664
2665/**
2666 * Debug Info: Record info about a label.
2667 */
2668static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2669{
2670 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2671 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2672 pEntry->Label.uUnused = 0;
2673 pEntry->Label.enmLabel = (uint8_t)enmType;
2674 pEntry->Label.uData = uData;
2675}
2676
2677
2678/**
2679 * Debug Info: Record info about a threaded call.
2680 */
2681static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2682{
2683 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2684 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2685 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2686 pEntry->ThreadedCall.uUnused = 0;
2687 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2688}
2689
2690
2691/**
2692 * Debug Info: Record info about a new guest instruction.
2693 */
2694static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2695{
2696 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2697 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2698 pEntry->GuestInstruction.uUnused = 0;
2699 pEntry->GuestInstruction.fExec = fExec;
2700}
2701
2702
2703/**
2704 * Debug Info: Record info about guest register shadowing.
2705 */
2706static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2707 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2708{
2709 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2710 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2711 pEntry->GuestRegShadowing.uUnused = 0;
2712 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2713 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2714 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2715}
2716
2717#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2718
2719
2720/*********************************************************************************************************************************
2721* Register Allocator *
2722*********************************************************************************************************************************/
2723
2724/**
2725 * Register parameter indexes (indexed by argument number).
2726 */
2727DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2728{
2729 IEMNATIVE_CALL_ARG0_GREG,
2730 IEMNATIVE_CALL_ARG1_GREG,
2731 IEMNATIVE_CALL_ARG2_GREG,
2732 IEMNATIVE_CALL_ARG3_GREG,
2733#if defined(IEMNATIVE_CALL_ARG4_GREG)
2734 IEMNATIVE_CALL_ARG4_GREG,
2735# if defined(IEMNATIVE_CALL_ARG5_GREG)
2736 IEMNATIVE_CALL_ARG5_GREG,
2737# if defined(IEMNATIVE_CALL_ARG6_GREG)
2738 IEMNATIVE_CALL_ARG6_GREG,
2739# if defined(IEMNATIVE_CALL_ARG7_GREG)
2740 IEMNATIVE_CALL_ARG7_GREG,
2741# endif
2742# endif
2743# endif
2744#endif
2745};
2746
2747/**
2748 * Call register masks indexed by argument count.
2749 */
2750DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2751{
2752 0,
2753 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2754 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2755 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2756 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2757 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2758#if defined(IEMNATIVE_CALL_ARG4_GREG)
2759 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2760 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2761# if defined(IEMNATIVE_CALL_ARG5_GREG)
2762 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2763 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2764# if defined(IEMNATIVE_CALL_ARG6_GREG)
2765 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2766 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2767 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2768# if defined(IEMNATIVE_CALL_ARG7_GREG)
2769 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2770 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2771 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2772# endif
2773# endif
2774# endif
2775#endif
2776};
2777
2778#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2779/**
2780 * BP offset of the stack argument slots.
2781 *
2782 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2783 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2784 */
2785DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2786{
2787 IEMNATIVE_FP_OFF_STACK_ARG0,
2788# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2789 IEMNATIVE_FP_OFF_STACK_ARG1,
2790# endif
2791# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2792 IEMNATIVE_FP_OFF_STACK_ARG2,
2793# endif
2794# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2795 IEMNATIVE_FP_OFF_STACK_ARG3,
2796# endif
2797};
2798AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2799#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2800
2801/**
2802 * Info about shadowed guest register values.
2803 * @see IEMNATIVEGSTREG
2804 */
2805static struct
2806{
2807 /** Offset in VMCPU. */
2808 uint32_t off;
2809 /** The field size. */
2810 uint8_t cb;
2811 /** Name (for logging). */
2812 const char *pszName;
2813} const g_aGstShadowInfo[] =
2814{
2815#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2816 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2817 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2818 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2819 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2820 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2821 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2822 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2823 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2824 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2825 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2826 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2827 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2828 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2829 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2830 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2831 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2832 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2833 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2834 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2835 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2836 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2837 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2838 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2839 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2840 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2841 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2842 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2843 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2844 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2845 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2846 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2847 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2848 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2849 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2850 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2851 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2852#undef CPUMCTX_OFF_AND_SIZE
2853};
2854AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2855
2856
2857/** Host CPU general purpose register names. */
2858DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2859{
2860#ifdef RT_ARCH_AMD64
2861 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2862#elif RT_ARCH_ARM64
2863 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2864 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2865#else
2866# error "port me"
2867#endif
2868};
2869
2870
2871DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2872 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2873{
2874 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2875
2876 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2877 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2878 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2879 return (uint8_t)idxReg;
2880}
2881
2882
2883/**
2884 * Tries to locate a suitable register in the given register mask.
2885 *
2886 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2887 * failed.
2888 *
2889 * @returns Host register number on success, returns UINT8_MAX on failure.
2890 */
2891static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2892{
2893 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2894 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2895 if (fRegs)
2896 {
2897 /** @todo pick better here: */
2898 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2899
2900 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2901 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2902 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2903 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2904
2905 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2906 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2907 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2908 return idxReg;
2909 }
2910 return UINT8_MAX;
2911}
2912
2913
2914/**
2915 * Locate a register, possibly freeing one up.
2916 *
2917 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2918 * failed.
2919 *
2920 * @returns Host register number on success. Returns UINT8_MAX if no registers
2921 * found, the caller is supposed to deal with this and raise a
2922 * allocation type specific status code (if desired).
2923 *
2924 * @throws VBox status code if we're run into trouble spilling a variable of
2925 * recording debug info. Does NOT throw anything if we're out of
2926 * registers, though.
2927 */
2928static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2929 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2930{
2931 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2932 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2933
2934 /*
2935 * Try a freed register that's shadowing a guest register
2936 */
2937 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2938 if (fRegs)
2939 {
2940 unsigned const idxReg = (fPreferVolatile
2941 ? ASMBitFirstSetU32(fRegs)
2942 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2943 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2944 - 1;
2945
2946 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2947 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2948 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2949 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2950
2951 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2952 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2953 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2954 return idxReg;
2955 }
2956
2957 /*
2958 * Try free up a variable that's in a register.
2959 *
2960 * We do two rounds here, first evacuating variables we don't need to be
2961 * saved on the stack, then in the second round move things to the stack.
2962 */
2963 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2964 {
2965 uint32_t fVars = pReNative->Core.bmVars;
2966 while (fVars)
2967 {
2968 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2969 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2970/** @todo Prevent active variables from changing here... */
2971 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2972 && (RT_BIT_32(idxReg) & fRegMask)
2973 && ( iLoop == 0
2974 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2975 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2976 {
2977 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2978 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2979 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2980 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2981 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2982 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2983
2984 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2985 {
2986 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
2987 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2988 }
2989
2990 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2991 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2992 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2993 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2994 return idxReg;
2995 }
2996 fVars &= ~RT_BIT_32(idxVar);
2997 }
2998 }
2999
3000 return UINT8_MAX;
3001}
3002
3003
3004/**
3005 * Reassigns a variable to a different register specified by the caller.
3006 *
3007 * @returns The new code buffer position.
3008 * @param pReNative The native recompile state.
3009 * @param off The current code buffer position.
3010 * @param idxVar The variable index.
3011 * @param idxRegOld The old host register number.
3012 * @param idxRegNew The new host register number.
3013 * @param pszCaller The caller for logging.
3014 */
3015static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3016 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3017{
3018 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3019 RT_NOREF(pszCaller);
3020
3021 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3022
3023 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3024 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3025 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3026 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3027
3028 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3029 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3030 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3031 if (fGstRegShadows)
3032 {
3033 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
3034 while (fGstRegShadows)
3035 {
3036 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3037 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3038
3039 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3040 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3041 }
3042 }
3043
3044 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3045 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3046 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3047 return off;
3048}
3049
3050
3051/**
3052 * Moves a variable to a different register or spills it onto the stack.
3053 *
3054 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3055 * kinds can easily be recreated if needed later.
3056 *
3057 * @returns The new code buffer position.
3058 * @param pReNative The native recompile state.
3059 * @param off The current code buffer position.
3060 * @param idxVar The variable index.
3061 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3062 * call-volatile registers.
3063 */
3064static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3065 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3066{
3067 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3068 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3069
3070 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3071 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3072 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3073 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3074 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3075 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3076 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3077 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3078 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3079
3080
3081 /** @todo Add statistics on this.*/
3082 /** @todo Implement basic variable liveness analysis (python) so variables
3083 * can be freed immediately once no longer used. This has the potential to
3084 * be trashing registers and stack for dead variables. */
3085
3086 /*
3087 * First try move it to a different register, as that's cheaper.
3088 */
3089 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3090 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3091 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3092 if (fRegs)
3093 {
3094 /* Avoid using shadow registers, if possible. */
3095 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3096 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3097 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3098 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3099 }
3100
3101 /*
3102 * Otherwise we must spill the register onto the stack.
3103 */
3104 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3105 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3106 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3107 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3108
3109 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3110 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3111 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3112 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3113 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3114 return off;
3115}
3116
3117
3118/**
3119 * Allocates a temporary host general purpose register.
3120 *
3121 * This may emit code to save register content onto the stack in order to free
3122 * up a register.
3123 *
3124 * @returns The host register number; throws VBox status code on failure,
3125 * so no need to check the return value.
3126 * @param pReNative The native recompile state.
3127 * @param poff Pointer to the variable with the code buffer position.
3128 * This will be update if we need to move a variable from
3129 * register to stack in order to satisfy the request.
3130 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
3131 * registers (@c true, default) or the other way around
3132 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3133 */
3134DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3135{
3136 /*
3137 * Try find a completely unused register, preferably a call-volatile one.
3138 */
3139 uint8_t idxReg;
3140 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3141 & ~pReNative->Core.bmHstRegsWithGstShadow
3142 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3143 if (fRegs)
3144 {
3145 if (fPreferVolatile)
3146 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3147 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3148 else
3149 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3150 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3151 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3152 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3153 }
3154 else
3155 {
3156 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3157 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3158 }
3159 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3160}
3161
3162
3163/**
3164 * Allocates a temporary register for loading an immediate value into.
3165 *
3166 * This will emit code to load the immediate, unless there happens to be an
3167 * unused register with the value already loaded.
3168 *
3169 * The caller will not modify the returned register, it must be considered
3170 * read-only. Free using iemNativeRegFreeTmpImm.
3171 *
3172 * @returns The host register number; throws VBox status code on failure, so no
3173 * need to check the return value.
3174 * @param pReNative The native recompile state.
3175 * @param poff Pointer to the variable with the code buffer position.
3176 * @param uImm The immediate value that the register must hold upon
3177 * return.
3178 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
3179 * registers (@c true, default) or the other way around
3180 * (@c false).
3181 *
3182 * @note Reusing immediate values has not been implemented yet.
3183 */
3184DECL_HIDDEN_THROW(uint8_t)
3185iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3186{
3187 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3188 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3189 return idxReg;
3190}
3191
3192
3193/**
3194 * Marks host register @a idxHstReg as containing a shadow copy of guest
3195 * register @a enmGstReg.
3196 *
3197 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3198 * host register before calling.
3199 */
3200DECL_FORCE_INLINE(void)
3201iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3202{
3203 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3204 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3205
3206 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3207 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
3208 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3209 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3210#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3211 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3212 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3213#else
3214 RT_NOREF(off);
3215#endif
3216}
3217
3218
3219/**
3220 * Clear any guest register shadow claims from @a idxHstReg.
3221 *
3222 * The register does not need to be shadowing any guest registers.
3223 */
3224DECL_FORCE_INLINE(void)
3225iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3226{
3227 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3228 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3229 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3230 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3231 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3232
3233#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3234 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3235 if (fGstRegs)
3236 {
3237 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3238 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3239 while (fGstRegs)
3240 {
3241 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3242 fGstRegs &= ~RT_BIT_64(iGstReg);
3243 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3244 }
3245 }
3246#else
3247 RT_NOREF(off);
3248#endif
3249
3250 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3251 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3252 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3253}
3254
3255
3256/**
3257 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3258 * and global overview flags.
3259 */
3260DECL_FORCE_INLINE(void)
3261iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3262{
3263 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3264 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3265 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3266 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3267 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3268 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3269 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3270
3271#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3272 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3273 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3274#else
3275 RT_NOREF(off);
3276#endif
3277
3278 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3279 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3280 if (!fGstRegShadowsNew)
3281 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3282 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3283}
3284
3285
3286/**
3287 * Clear any guest register shadow claim for @a enmGstReg.
3288 */
3289DECL_FORCE_INLINE(void)
3290iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3291{
3292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3293 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3294 {
3295 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3296 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3297 }
3298}
3299
3300
3301/**
3302 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3303 * as the new shadow of it.
3304 */
3305DECL_FORCE_INLINE(void)
3306iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3307 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3308{
3309 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3310 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3311 {
3312 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3313 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3314 return;
3315 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3316 }
3317 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3318}
3319
3320
3321/**
3322 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3323 * to @a idxRegTo.
3324 */
3325DECL_FORCE_INLINE(void)
3326iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3327 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3328{
3329 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3330 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3331 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3332 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3333 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3334 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3335 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3336 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3337
3338 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3339 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3340 if (!fGstRegShadowsFrom)
3341 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3342 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3343 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3344 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3345#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3346 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3347 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3348#else
3349 RT_NOREF(off);
3350#endif
3351}
3352
3353
3354/**
3355 * Allocates a temporary host general purpose register for keeping a guest
3356 * register value.
3357 *
3358 * Since we may already have a register holding the guest register value,
3359 * code will be emitted to do the loading if that's not the case. Code may also
3360 * be emitted if we have to free up a register to satify the request.
3361 *
3362 * @returns The host register number; throws VBox status code on failure, so no
3363 * need to check the return value.
3364 * @param pReNative The native recompile state.
3365 * @param poff Pointer to the variable with the code buffer
3366 * position. This will be update if we need to move a
3367 * variable from register to stack in order to satisfy
3368 * the request.
3369 * @param enmGstReg The guest register that will is to be updated.
3370 * @param enmIntendedUse How the caller will be using the host register.
3371 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3372 */
3373DECL_HIDDEN_THROW(uint8_t)
3374iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3375 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
3376{
3377 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3378#ifdef LOG_ENABLED
3379 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3380#endif
3381
3382 /*
3383 * First check if the guest register value is already in a host register.
3384 */
3385 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3386 {
3387 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3388 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3389 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3390 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3391
3392 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3393 {
3394 /*
3395 * If the register will trash the guest shadow copy, try find a
3396 * completely unused register we can use instead. If that fails,
3397 * we need to disassociate the host reg from the guest reg.
3398 */
3399 /** @todo would be nice to know if preserving the register is in any way helpful. */
3400 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3401 && ( ~pReNative->Core.bmHstRegs
3402 & ~pReNative->Core.bmHstRegsWithGstShadow
3403 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3404 {
3405 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
3406
3407 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3408
3409 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3410 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3411 g_apszIemNativeHstRegNames[idxRegNew]));
3412 idxReg = idxRegNew;
3413 }
3414 else
3415 {
3416 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3417 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3418 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3419 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3420 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3421 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3422 else
3423 {
3424 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3425 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3426 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3427 }
3428 }
3429 }
3430 else
3431 {
3432 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3433 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3434 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3435 idxReg, s_pszIntendedUse[enmIntendedUse]));
3436
3437 /*
3438 * Allocate a new register, copy the value and, if updating, the
3439 * guest shadow copy assignment to the new register.
3440 */
3441 /** @todo share register for readonly access. */
3442 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3443
3444 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3445 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3446
3447 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3448 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3449 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3450 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3451 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3452 else
3453 {
3454 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3455 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3456 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3457 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3458 }
3459 idxReg = idxRegNew;
3460 }
3461
3462#ifdef VBOX_STRICT
3463 /* Strict builds: Check that the value is correct. */
3464 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3465#endif
3466
3467 return idxReg;
3468 }
3469
3470 /*
3471 * Allocate a new register, load it with the guest value and designate it as a copy of the
3472 */
3473 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3474
3475 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3476 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3477
3478 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3479 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3480 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3481 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3482
3483 return idxRegNew;
3484}
3485
3486
3487/**
3488 * Allocates a temporary host general purpose register that already holds the
3489 * given guest register value.
3490 *
3491 * The use case for this function is places where the shadowing state cannot be
3492 * modified due to branching and such. This will fail if the we don't have a
3493 * current shadow copy handy or if it's incompatible. The only code that will
3494 * be emitted here is value checking code in strict builds.
3495 *
3496 * The intended use can only be readonly!
3497 *
3498 * @returns The host register number, UINT8_MAX if not present.
3499 * @param pReNative The native recompile state.
3500 * @param poff Pointer to the instruction buffer offset.
3501 * Will be updated in strict builds if a register is
3502 * found.
3503 * @param enmGstReg The guest register that will is to be updated.
3504 * @note In strict builds, this may throw instruction buffer growth failures.
3505 * Non-strict builds will not throw anything.
3506 * @sa iemNativeRegAllocTmpForGuestReg
3507 */
3508DECL_HIDDEN_THROW(uint8_t)
3509iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3510{
3511 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3512
3513 /*
3514 * First check if the guest register value is already in a host register.
3515 */
3516 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3517 {
3518 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3519 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3520 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3521 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3522
3523 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3524 {
3525 /*
3526 * We only do readonly use here, so easy compared to the other
3527 * variant of this code.
3528 */
3529 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3530 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3531 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3532 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3533 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3534
3535#ifdef VBOX_STRICT
3536 /* Strict builds: Check that the value is correct. */
3537 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3538#else
3539 RT_NOREF(poff);
3540#endif
3541 return idxReg;
3542 }
3543 }
3544
3545 return UINT8_MAX;
3546}
3547
3548
3549DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3550
3551
3552/**
3553 * Allocates argument registers for a function call.
3554 *
3555 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3556 * need to check the return value.
3557 * @param pReNative The native recompile state.
3558 * @param off The current code buffer offset.
3559 * @param cArgs The number of arguments the function call takes.
3560 */
3561DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3562{
3563 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3564 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3565 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3566 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3567
3568 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3569 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3570 else if (cArgs == 0)
3571 return true;
3572
3573 /*
3574 * Do we get luck and all register are free and not shadowing anything?
3575 */
3576 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3577 for (uint32_t i = 0; i < cArgs; i++)
3578 {
3579 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3580 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3581 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3582 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3583 }
3584 /*
3585 * Okay, not lucky so we have to free up the registers.
3586 */
3587 else
3588 for (uint32_t i = 0; i < cArgs; i++)
3589 {
3590 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3591 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3592 {
3593 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3594 {
3595 case kIemNativeWhat_Var:
3596 {
3597 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3598 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3599 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3600 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3601 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3602
3603 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3604 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3605 else
3606 {
3607 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3608 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3609 }
3610 break;
3611 }
3612
3613 case kIemNativeWhat_Tmp:
3614 case kIemNativeWhat_Arg:
3615 case kIemNativeWhat_rc:
3616 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3617 default:
3618 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3619 }
3620
3621 }
3622 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3623 {
3624 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3625 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3626 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3627 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3628 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3629 }
3630 else
3631 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3634 }
3635 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3636 return true;
3637}
3638
3639
3640DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3641
3642
3643#if 0
3644/**
3645 * Frees a register assignment of any type.
3646 *
3647 * @param pReNative The native recompile state.
3648 * @param idxHstReg The register to free.
3649 *
3650 * @note Does not update variables.
3651 */
3652DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3653{
3654 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3655 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3656 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3657 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3658 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3659 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3660 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3661 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3662 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3663 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3664 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3665 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3666 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3667 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3668
3669 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3670 /* no flushing, right:
3671 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3672 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3673 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3674 */
3675}
3676#endif
3677
3678
3679/**
3680 * Frees a temporary register.
3681 *
3682 * Any shadow copies of guest registers assigned to the host register will not
3683 * be flushed by this operation.
3684 */
3685DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3686{
3687 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3688 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3689 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3690 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3691 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3692}
3693
3694
3695/**
3696 * Frees a temporary immediate register.
3697 *
3698 * It is assumed that the call has not modified the register, so it still hold
3699 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3700 */
3701DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3702{
3703 iemNativeRegFreeTmp(pReNative, idxHstReg);
3704}
3705
3706
3707/**
3708 * Frees a register assigned to a variable.
3709 *
3710 * The register will be disassociated from the variable.
3711 */
3712DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3713{
3714 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3715 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3716 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3718 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
3719
3720 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3721 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3722 if (!fFlushShadows)
3723 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
3724 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3725 else
3726 {
3727 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3728 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3729 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3730 uint64_t fGstRegShadows = fGstRegShadowsOld;
3731 while (fGstRegShadows)
3732 {
3733 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3734 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3735
3736 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3737 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3738 }
3739 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
3740 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3741 }
3742}
3743
3744
3745/**
3746 * Called right before emitting a call instruction to move anything important
3747 * out of call-volatile registers, free and flush the call-volatile registers,
3748 * optionally freeing argument variables.
3749 *
3750 * @returns New code buffer offset, UINT32_MAX on failure.
3751 * @param pReNative The native recompile state.
3752 * @param off The code buffer offset.
3753 * @param cArgs The number of arguments the function call takes.
3754 * It is presumed that the host register part of these have
3755 * been allocated as such already and won't need moving,
3756 * just freeing.
3757 */
3758DECL_HIDDEN_THROW(uint32_t)
3759iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3760{
3761 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
3762
3763 /*
3764 * Move anything important out of volatile registers.
3765 */
3766 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3767 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3768 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
3769#ifdef IEMNATIVE_REG_FIXED_TMP0
3770 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3771#endif
3772 & ~g_afIemNativeCallRegs[cArgs];
3773
3774 fRegsToMove &= pReNative->Core.bmHstRegs;
3775 if (!fRegsToMove)
3776 { /* likely */ }
3777 else
3778 {
3779 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
3780 while (fRegsToMove != 0)
3781 {
3782 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
3783 fRegsToMove &= ~RT_BIT_32(idxReg);
3784
3785 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3786 {
3787 case kIemNativeWhat_Var:
3788 {
3789 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3790 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3791 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3792 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3793 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
3794 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
3795 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3796 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3797 else
3798 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3799 continue;
3800 }
3801
3802 case kIemNativeWhat_Arg:
3803 AssertMsgFailed(("What?!?: %u\n", idxReg));
3804 continue;
3805
3806 case kIemNativeWhat_rc:
3807 case kIemNativeWhat_Tmp:
3808 AssertMsgFailed(("Missing free: %u\n", idxReg));
3809 continue;
3810
3811 case kIemNativeWhat_FixedTmp:
3812 case kIemNativeWhat_pVCpuFixed:
3813 case kIemNativeWhat_pCtxFixed:
3814 case kIemNativeWhat_FixedReserved:
3815 case kIemNativeWhat_Invalid:
3816 case kIemNativeWhat_End:
3817 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3818 }
3819 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3820 }
3821 }
3822
3823 /*
3824 * Do the actual freeing.
3825 */
3826 if (pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3827 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n", pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3828 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3829
3830 /* If there are guest register shadows in any call-volatile register, we
3831 have to clear the corrsponding guest register masks for each register. */
3832 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3833 if (fHstRegsWithGstShadow)
3834 {
3835 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3836 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
3837 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3838 do
3839 {
3840 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3841 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3842
3843 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
3844 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3845 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3846 } while (fHstRegsWithGstShadow != 0);
3847 }
3848
3849 return off;
3850}
3851
3852
3853/**
3854 * Flushes a set of guest register shadow copies.
3855 *
3856 * This is usually done after calling a threaded function or a C-implementation
3857 * of an instruction.
3858 *
3859 * @param pReNative The native recompile state.
3860 * @param fGstRegs Set of guest registers to flush.
3861 */
3862DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3863{
3864 /*
3865 * Reduce the mask by what's currently shadowed
3866 */
3867 fGstRegs &= pReNative->Core.bmGstRegShadows;
3868 if (fGstRegs)
3869 {
3870 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n",
3871 fGstRegs, pReNative->Core.bmGstRegShadows, pReNative->Core.bmGstRegShadows & ~fGstRegs));
3872 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3873 if (pReNative->Core.bmGstRegShadows)
3874 {
3875 /*
3876 * Partial.
3877 */
3878 do
3879 {
3880 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3881 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3882 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3883 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3884 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3885
3886 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3887 fGstRegs &= ~fInThisHstReg;
3888 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3889 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3890 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3891 } while (fGstRegs != 0);
3892 }
3893 else
3894 {
3895 /*
3896 * Clear all.
3897 */
3898 do
3899 {
3900 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3901 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3902 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3903 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3904 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3905
3906 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3907 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3908 } while (fGstRegs != 0);
3909 pReNative->Core.bmHstRegsWithGstShadow = 0;
3910 }
3911 }
3912}
3913
3914
3915/**
3916 * Flushes delayed write of a specific guest register.
3917 *
3918 * This must be called prior to calling CImpl functions and any helpers that use
3919 * the guest state (like raising exceptions) and such.
3920 *
3921 * This optimization has not yet been implemented. The first target would be
3922 * RIP updates, since these are the most common ones.
3923 */
3924DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3925 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
3926{
3927 RT_NOREF(pReNative, enmClass, idxReg);
3928 return off;
3929}
3930
3931
3932/**
3933 * Flushes any delayed guest register writes.
3934 *
3935 * This must be called prior to calling CImpl functions and any helpers that use
3936 * the guest state (like raising exceptions) and such.
3937 *
3938 * This optimization has not yet been implemented. The first target would be
3939 * RIP updates, since these are the most common ones.
3940 */
3941DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3942{
3943 RT_NOREF(pReNative, off);
3944 return off;
3945}
3946
3947
3948#ifdef VBOX_STRICT
3949/**
3950 * Does internal register allocator sanity checks.
3951 */
3952static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
3953{
3954 /*
3955 * Iterate host registers building a guest shadowing set.
3956 */
3957 uint64_t bmGstRegShadows = 0;
3958 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
3959 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
3960 while (bmHstRegsWithGstShadow)
3961 {
3962 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
3963 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3964 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3965
3966 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3967 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
3968 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
3969 bmGstRegShadows |= fThisGstRegShadows;
3970 while (fThisGstRegShadows)
3971 {
3972 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
3973 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
3974 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
3975 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
3976 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
3977 }
3978 }
3979 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
3980 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
3981 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
3982
3983 /*
3984 * Now the other way around, checking the guest to host index array.
3985 */
3986 bmHstRegsWithGstShadow = 0;
3987 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
3988 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3989 while (bmGstRegShadows)
3990 {
3991 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
3992 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3993 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
3994
3995 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3996 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
3997 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
3998 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
3999 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4000 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4001 }
4002 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4003 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4004 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4005}
4006#endif
4007
4008
4009/*********************************************************************************************************************************
4010* Code Emitters (larger snippets) *
4011*********************************************************************************************************************************/
4012
4013/**
4014 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4015 * extending to 64-bit width.
4016 *
4017 * @returns New code buffer offset on success, UINT32_MAX on failure.
4018 * @param pReNative .
4019 * @param off The current code buffer position.
4020 * @param idxHstReg The host register to load the guest register value into.
4021 * @param enmGstReg The guest register to load.
4022 *
4023 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4024 * that is something the caller needs to do if applicable.
4025 */
4026DECL_HIDDEN_THROW(uint32_t)
4027iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4028{
4029 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4030 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4031
4032 switch (g_aGstShadowInfo[enmGstReg].cb)
4033 {
4034 case sizeof(uint64_t):
4035 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4036 case sizeof(uint32_t):
4037 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4038 case sizeof(uint16_t):
4039 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4040#if 0 /* not present in the table. */
4041 case sizeof(uint8_t):
4042 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4043#endif
4044 default:
4045 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4046 }
4047}
4048
4049
4050#ifdef VBOX_STRICT
4051/**
4052 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4053 *
4054 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4055 * Trashes EFLAGS on AMD64.
4056 */
4057static uint32_t
4058iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4059{
4060# ifdef RT_ARCH_AMD64
4061 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4062
4063 /* rol reg64, 32 */
4064 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4065 pbCodeBuf[off++] = 0xc1;
4066 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4067 pbCodeBuf[off++] = 32;
4068
4069 /* test reg32, ffffffffh */
4070 if (idxReg >= 8)
4071 pbCodeBuf[off++] = X86_OP_REX_B;
4072 pbCodeBuf[off++] = 0xf7;
4073 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4074 pbCodeBuf[off++] = 0xff;
4075 pbCodeBuf[off++] = 0xff;
4076 pbCodeBuf[off++] = 0xff;
4077 pbCodeBuf[off++] = 0xff;
4078
4079 /* je/jz +1 */
4080 pbCodeBuf[off++] = 0x74;
4081 pbCodeBuf[off++] = 0x01;
4082
4083 /* int3 */
4084 pbCodeBuf[off++] = 0xcc;
4085
4086 /* rol reg64, 32 */
4087 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4088 pbCodeBuf[off++] = 0xc1;
4089 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4090 pbCodeBuf[off++] = 32;
4091
4092# elif defined(RT_ARCH_ARM64)
4093 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4094 /* lsr tmp0, reg64, #32 */
4095 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4096 /* cbz tmp0, +1 */
4097 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4098 /* brk #0x1100 */
4099 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4100
4101# else
4102# error "Port me!"
4103# endif
4104 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4105 return off;
4106}
4107#endif /* VBOX_STRICT */
4108
4109
4110#ifdef VBOX_STRICT
4111/**
4112 * Emitting code that checks that the content of register @a idxReg is the same
4113 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4114 * instruction if that's not the case.
4115 *
4116 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4117 * Trashes EFLAGS on AMD64.
4118 */
4119static uint32_t
4120iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4121{
4122# ifdef RT_ARCH_AMD64
4123 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4124
4125 /* cmp reg, [mem] */
4126 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4127 {
4128 if (idxReg >= 8)
4129 pbCodeBuf[off++] = X86_OP_REX_R;
4130 pbCodeBuf[off++] = 0x38;
4131 }
4132 else
4133 {
4134 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4135 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4136 else
4137 {
4138 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4139 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4140 else
4141 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4142 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4143 if (idxReg >= 8)
4144 pbCodeBuf[off++] = X86_OP_REX_R;
4145 }
4146 pbCodeBuf[off++] = 0x39;
4147 }
4148 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4149
4150 /* je/jz +1 */
4151 pbCodeBuf[off++] = 0x74;
4152 pbCodeBuf[off++] = 0x01;
4153
4154 /* int3 */
4155 pbCodeBuf[off++] = 0xcc;
4156
4157 /* For values smaller than the register size, we must check that the rest
4158 of the register is all zeros. */
4159 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4160 {
4161 /* test reg64, imm32 */
4162 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4163 pbCodeBuf[off++] = 0xf7;
4164 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4165 pbCodeBuf[off++] = 0;
4166 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4167 pbCodeBuf[off++] = 0xff;
4168 pbCodeBuf[off++] = 0xff;
4169
4170 /* je/jz +1 */
4171 pbCodeBuf[off++] = 0x74;
4172 pbCodeBuf[off++] = 0x01;
4173
4174 /* int3 */
4175 pbCodeBuf[off++] = 0xcc;
4176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4177 }
4178 else
4179 {
4180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4181 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4182 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4183 }
4184
4185# elif defined(RT_ARCH_ARM64)
4186 /* mov TMP0, [gstreg] */
4187 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4188
4189 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4190 /* sub tmp0, tmp0, idxReg */
4191 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4192 /* cbz tmp0, +1 */
4193 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4194 /* brk #0x1000+enmGstReg */
4195 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4197
4198# else
4199# error "Port me!"
4200# endif
4201 return off;
4202}
4203#endif /* VBOX_STRICT */
4204
4205
4206#ifdef VBOX_STRICT
4207/**
4208 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4209 * important bits.
4210 *
4211 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4212 * Trashes EFLAGS on AMD64.
4213 */
4214static uint32_t
4215iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4216{
4217 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4218 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4219 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4220 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4221
4222#ifdef RT_ARCH_AMD64
4223 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4224
4225 /* je/jz +1 */
4226 pbCodeBuf[off++] = 0x74;
4227 pbCodeBuf[off++] = 0x01;
4228
4229 /* int3 */
4230 pbCodeBuf[off++] = 0xcc;
4231
4232# elif defined(RT_ARCH_ARM64)
4233 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4234
4235 /* b.eq +1 */
4236 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4237 /* brk #0x2000 */
4238 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4239
4240# else
4241# error "Port me!"
4242# endif
4243 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4244
4245 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4246 return off;
4247}
4248#endif /* VBOX_STRICT */
4249
4250
4251/**
4252 * Emits a code for checking the return code of a call and rcPassUp, returning
4253 * from the code if either are non-zero.
4254 */
4255DECL_HIDDEN_THROW(uint32_t)
4256iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4257{
4258#ifdef RT_ARCH_AMD64
4259 /*
4260 * AMD64: eax = call status code.
4261 */
4262
4263 /* edx = rcPassUp */
4264 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4265# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4266 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4267# endif
4268
4269 /* edx = eax | rcPassUp */
4270 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4271 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4272 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4273 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4274
4275 /* Jump to non-zero status return path. */
4276 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
4277
4278 /* done. */
4279
4280#elif RT_ARCH_ARM64
4281 /*
4282 * ARM64: w0 = call status code.
4283 */
4284# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4285 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
4286# endif
4287 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4288
4289 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4290
4291 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
4292
4293 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4294 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
4295 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
4296
4297#else
4298# error "port me"
4299#endif
4300 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4301 return off;
4302}
4303
4304
4305/**
4306 * Emits code to check if the content of @a idxAddrReg is a canonical address,
4307 * raising a \#GP(0) if it isn't.
4308 *
4309 * @returns New code buffer offset, UINT32_MAX on failure.
4310 * @param pReNative The native recompile state.
4311 * @param off The code buffer offset.
4312 * @param idxAddrReg The host register with the address to check.
4313 * @param idxInstr The current instruction.
4314 */
4315DECL_HIDDEN_THROW(uint32_t)
4316iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
4317{
4318 RT_NOREF(idxInstr);
4319
4320 /*
4321 * Make sure we don't have any outstanding guest register writes as we may
4322 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4323 */
4324 off = iemNativeRegFlushPendingWrites(pReNative, off);
4325
4326#ifdef RT_ARCH_AMD64
4327 /*
4328 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
4329 * return raisexcpt();
4330 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
4331 */
4332 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4333
4334 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
4335 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
4336 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
4337 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
4338
4339# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4340 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4341# else
4342 uint32_t const offFixup = off;
4343 off = iemNativeEmitJzToFixed(pReNative, off, 0);
4344 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
4345 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4346 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
4347# endif
4348
4349 iemNativeRegFreeTmp(pReNative, iTmpReg);
4350
4351#elif defined(RT_ARCH_ARM64)
4352 /*
4353 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
4354 * return raisexcpt();
4355 * ----
4356 * mov x1, 0x800000000000
4357 * add x1, x0, x1
4358 * cmp xzr, x1, lsr 48
4359 * and either:
4360 * b.ne .Lraisexcpt
4361 * or:
4362 * b.eq .Lnoexcept
4363 * movz x1, #instruction-number
4364 * b .Lraisexcpt
4365 * .Lnoexcept:
4366 */
4367 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4368
4369 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
4370 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
4371 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
4372
4373# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4374 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4375# else
4376 uint32_t const offFixup = off;
4377 off = iemNativeEmitJzToFixed(pReNative, off, 0);
4378 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
4379 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4380 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
4381# endif
4382
4383 iemNativeRegFreeTmp(pReNative, iTmpReg);
4384
4385#else
4386# error "Port me"
4387#endif
4388 return off;
4389}
4390
4391
4392/**
4393 * Emits code to check if the content of @a idxAddrReg is within the limit of
4394 * idxSegReg, raising a \#GP(0) if it isn't.
4395 *
4396 * @returns New code buffer offset; throws VBox status code on error.
4397 * @param pReNative The native recompile state.
4398 * @param off The code buffer offset.
4399 * @param idxAddrReg The host register (32-bit) with the address to
4400 * check.
4401 * @param idxSegReg The segment register (X86_SREG_XXX) to check
4402 * against.
4403 * @param idxInstr The current instruction.
4404 */
4405DECL_HIDDEN_THROW(uint32_t)
4406iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4407 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
4408{
4409 /*
4410 * Make sure we don't have any outstanding guest register writes as we may
4411 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4412 */
4413 off = iemNativeRegFlushPendingWrites(pReNative, off);
4414
4415 /** @todo implement expand down/whatnot checking */
4416 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
4417
4418 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4419 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
4420 kIemNativeGstRegUse_ForUpdate);
4421
4422 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
4423
4424#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4425 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4426 RT_NOREF(idxInstr);
4427#else
4428 uint32_t const offFixup = off;
4429 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
4430 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
4431 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4432 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
4433#endif
4434
4435 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
4436 return off;
4437}
4438
4439
4440/**
4441 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
4442 *
4443 * @returns The flush mask.
4444 * @param fCImpl The IEM_CIMPL_F_XXX flags.
4445 * @param fGstShwFlush The starting flush mask.
4446 */
4447DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
4448{
4449 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
4450 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
4451 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
4452 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
4453 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
4454 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
4455 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
4456 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
4457 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
4458 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
4459 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
4460 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
4461 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4462 return fGstShwFlush;
4463}
4464
4465
4466/**
4467 * Emits a call to a CImpl function or something similar.
4468 */
4469static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
4470 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
4471 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
4472{
4473 /*
4474 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
4475 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
4476 */
4477 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
4478 fGstShwFlush
4479 | RT_BIT_64(kIemNativeGstReg_Pc)
4480 | RT_BIT_64(kIemNativeGstReg_EFlags));
4481 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4482
4483 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4484
4485 /*
4486 * Load the parameters.
4487 */
4488#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
4489 /* Special code the hidden VBOXSTRICTRC pointer. */
4490 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4491 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4492 if (cAddParams > 0)
4493 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
4494 if (cAddParams > 1)
4495 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
4496 if (cAddParams > 2)
4497 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
4498 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4499
4500#else
4501 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4502 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4503 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4504 if (cAddParams > 0)
4505 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
4506 if (cAddParams > 1)
4507 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
4508 if (cAddParams > 2)
4509# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
4510 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
4511# else
4512 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
4513# endif
4514#endif
4515
4516 /*
4517 * Make the call.
4518 */
4519 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
4520
4521#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4522 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4523#endif
4524
4525 /*
4526 * Check the status code.
4527 */
4528 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4529}
4530
4531
4532/**
4533 * Emits a call to a threaded worker function.
4534 */
4535static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
4536{
4537 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
4538 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4539
4540#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4541 /* The threaded function may throw / long jmp, so set current instruction
4542 number if we're counting. */
4543 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4544#endif
4545
4546 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
4547
4548#ifdef RT_ARCH_AMD64
4549 /* Load the parameters and emit the call. */
4550# ifdef RT_OS_WINDOWS
4551# ifndef VBOXSTRICTRC_STRICT_ENABLED
4552 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4553 if (cParams > 0)
4554 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
4555 if (cParams > 1)
4556 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
4557 if (cParams > 2)
4558 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
4559# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
4560 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
4561 if (cParams > 0)
4562 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4563 if (cParams > 1)
4564 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4565 if (cParams > 2)
4566 {
4567 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4568 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4569 }
4570 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4571# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4572# else
4573 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4574 if (cParams > 0)
4575 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4576 if (cParams > 1)
4577 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4578 if (cParams > 2)
4579 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4580# endif
4581
4582 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4583
4584# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4585 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4586# endif
4587
4588#elif RT_ARCH_ARM64
4589 /*
4590 * ARM64:
4591 */
4592 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4593 if (cParams > 0)
4594 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4595 if (cParams > 1)
4596 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4597 if (cParams > 2)
4598 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4599
4600 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4601
4602#else
4603# error "port me"
4604#endif
4605
4606 /*
4607 * Check the status code.
4608 */
4609 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4610
4611 return off;
4612}
4613
4614
4615/**
4616 * Emits the code at the RaiseGP0 label.
4617 */
4618static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4619{
4620 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
4621 if (idxLabel != UINT32_MAX)
4622 {
4623 iemNativeLabelDefine(pReNative, idxLabel, off);
4624
4625 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
4626 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4627#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4628 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
4629#endif
4630 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
4631
4632 /* jump back to the return sequence. */
4633 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4634 }
4635 return off;
4636}
4637
4638
4639/**
4640 * Emits the code at the ReturnWithFlags label (returns
4641 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
4642 */
4643static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4644{
4645 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
4646 if (idxLabel != UINT32_MAX)
4647 {
4648 iemNativeLabelDefine(pReNative, idxLabel, off);
4649
4650 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
4651
4652 /* jump back to the return sequence. */
4653 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4654 }
4655 return off;
4656}
4657
4658
4659/**
4660 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4661 */
4662static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4663{
4664 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4665 if (idxLabel != UINT32_MAX)
4666 {
4667 iemNativeLabelDefine(pReNative, idxLabel, off);
4668
4669 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
4670
4671 /* jump back to the return sequence. */
4672 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4673 }
4674 return off;
4675}
4676
4677
4678/**
4679 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
4680 */
4681static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4682{
4683 /*
4684 * Generate the rc + rcPassUp fiddling code if needed.
4685 */
4686 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4687 if (idxLabel != UINT32_MAX)
4688 {
4689 iemNativeLabelDefine(pReNative, idxLabel, off);
4690
4691 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
4692#ifdef RT_ARCH_AMD64
4693# ifdef RT_OS_WINDOWS
4694# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4695 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
4696# endif
4697 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4698 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
4699# else
4700 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4701 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
4702# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4703 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
4704# endif
4705# endif
4706# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4707 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
4708# endif
4709
4710#else
4711 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
4712 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4713 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
4714#endif
4715
4716 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
4717 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4718 }
4719 return off;
4720}
4721
4722
4723/**
4724 * Emits a standard epilog.
4725 */
4726static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
4727{
4728 *pidxReturnLabel = UINT32_MAX;
4729
4730 /*
4731 * Successful return, so clear the return register (eax, w0).
4732 */
4733 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
4734
4735 /*
4736 * Define label for common return point.
4737 */
4738 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
4739 *pidxReturnLabel = idxReturn;
4740
4741 /*
4742 * Restore registers and return.
4743 */
4744#ifdef RT_ARCH_AMD64
4745 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4746
4747 /* Reposition esp at the r15 restore point. */
4748 pbCodeBuf[off++] = X86_OP_REX_W;
4749 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
4750 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
4751 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
4752
4753 /* Pop non-volatile registers and return */
4754 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
4755 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
4756 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
4757 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
4758 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
4759 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
4760 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
4761 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
4762# ifdef RT_OS_WINDOWS
4763 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
4764 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
4765# endif
4766 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
4767 pbCodeBuf[off++] = 0xc9; /* leave */
4768 pbCodeBuf[off++] = 0xc3; /* ret */
4769 pbCodeBuf[off++] = 0xcc; /* int3 poison */
4770
4771#elif RT_ARCH_ARM64
4772 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4773
4774 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
4775 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
4776 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4777 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4778 IEMNATIVE_FRAME_VAR_SIZE / 8);
4779 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
4780 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4781 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4782 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4783 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4784 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4785 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4786 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4787 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4788 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4789 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4790 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4791
4792 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
4793 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
4794 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
4795 IEMNATIVE_FRAME_SAVE_REG_SIZE);
4796
4797 /* retab / ret */
4798# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
4799 if (1)
4800 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
4801 else
4802# endif
4803 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
4804
4805#else
4806# error "port me"
4807#endif
4808 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4809
4810 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
4811}
4812
4813
4814/**
4815 * Emits a standard prolog.
4816 */
4817static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4818{
4819#ifdef RT_ARCH_AMD64
4820 /*
4821 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
4822 * reserving 64 bytes for stack variables plus 4 non-register argument
4823 * slots. Fixed register assignment: xBX = pReNative;
4824 *
4825 * Since we always do the same register spilling, we can use the same
4826 * unwind description for all the code.
4827 */
4828 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4829 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
4830 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
4831 pbCodeBuf[off++] = 0x8b;
4832 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
4833 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
4834 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
4835# ifdef RT_OS_WINDOWS
4836 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
4837 pbCodeBuf[off++] = 0x8b;
4838 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
4839 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
4840 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
4841# else
4842 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
4843 pbCodeBuf[off++] = 0x8b;
4844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
4845# endif
4846 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
4847 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
4848 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
4849 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
4850 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
4851 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
4852 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
4853 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
4854
4855 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
4856 X86_GREG_xSP,
4857 IEMNATIVE_FRAME_ALIGN_SIZE
4858 + IEMNATIVE_FRAME_VAR_SIZE
4859 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
4860 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
4861 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
4862 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
4863 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
4864
4865#elif RT_ARCH_ARM64
4866 /*
4867 * We set up a stack frame exactly like on x86, only we have to push the
4868 * return address our selves here. We save all non-volatile registers.
4869 */
4870 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4871
4872# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
4873 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
4874 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
4875 * in any way conditional, so just emitting this instructions now and hoping for the best... */
4876 /* pacibsp */
4877 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
4878# endif
4879
4880 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
4881 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
4882 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4883 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4884 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
4885 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
4886 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4887 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4888 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4889 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4890 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4891 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4892 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4893 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4894 /* Save the BP and LR (ret address) registers at the top of the frame. */
4895 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4896 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4897 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4898 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
4899 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
4900 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
4901
4902 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
4903 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
4904
4905 /* mov r28, r0 */
4906 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
4907 /* mov r27, r1 */
4908 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
4909
4910#else
4911# error "port me"
4912#endif
4913 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4914 return off;
4915}
4916
4917
4918
4919
4920/*********************************************************************************************************************************
4921* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
4922*********************************************************************************************************************************/
4923
4924#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
4925 { \
4926 Assert(pReNative->Core.bmVars == 0); \
4927 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
4928 Assert(pReNative->Core.bmStack == 0); \
4929 pReNative->fMc = (a_fMcFlags); \
4930 pReNative->fCImpl = (a_fCImplFlags); \
4931 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
4932
4933/** We have to get to the end in recompilation mode, as otherwise we won't
4934 * generate code for all the IEM_MC_IF_XXX branches. */
4935#define IEM_MC_END() \
4936 iemNativeVarFreeAll(pReNative); \
4937 } return off
4938
4939
4940
4941/*********************************************************************************************************************************
4942* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
4943*********************************************************************************************************************************/
4944
4945#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
4946 pReNative->fMc = 0; \
4947 pReNative->fCImpl = (a_fFlags); \
4948 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
4949
4950
4951#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4952 pReNative->fMc = 0; \
4953 pReNative->fCImpl = (a_fFlags); \
4954 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
4955
4956DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4957 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4958 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
4959{
4960 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
4961}
4962
4963
4964#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4965 pReNative->fMc = 0; \
4966 pReNative->fCImpl = (a_fFlags); \
4967 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4968 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
4969
4970DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4971 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4972 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
4973{
4974 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
4975}
4976
4977
4978#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4979 pReNative->fMc = 0; \
4980 pReNative->fCImpl = (a_fFlags); \
4981 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4982 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
4983
4984DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4985 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4986 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
4987 uint64_t uArg2)
4988{
4989 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
4990}
4991
4992
4993
4994/*********************************************************************************************************************************
4995* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
4996*********************************************************************************************************************************/
4997
4998/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4999 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5000DECL_INLINE_THROW(uint32_t)
5001iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5002{
5003 /*
5004 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5005 * return with special status code and make the execution loop deal with
5006 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5007 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5008 * could continue w/o interruption, it probably will drop into the
5009 * debugger, so not worth the effort of trying to services it here and we
5010 * just lump it in with the handling of the others.
5011 *
5012 * To simplify the code and the register state management even more (wrt
5013 * immediate in AND operation), we always update the flags and skip the
5014 * extra check associated conditional jump.
5015 */
5016 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5017 <= UINT32_MAX);
5018 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5019 kIemNativeGstRegUse_ForUpdate);
5020 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5021 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5022 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5023 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5024 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5025
5026 /* Free but don't flush the EFLAGS register. */
5027 iemNativeRegFreeTmp(pReNative, idxEflReg);
5028
5029 return off;
5030}
5031
5032
5033#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5034 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5035
5036#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5037 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5038 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5039
5040/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5041DECL_INLINE_THROW(uint32_t)
5042iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5043{
5044 /* Allocate a temporary PC register. */
5045 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5046
5047 /* Perform the addition and store the result. */
5048 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5049 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5050
5051 /* Free but don't flush the PC register. */
5052 iemNativeRegFreeTmp(pReNative, idxPcReg);
5053
5054 return off;
5055}
5056
5057
5058#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5059 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5060
5061#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5062 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5063 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5064
5065/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5066DECL_INLINE_THROW(uint32_t)
5067iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5068{
5069 /* Allocate a temporary PC register. */
5070 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5071
5072 /* Perform the addition and store the result. */
5073 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5074 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5075
5076 /* Free but don't flush the PC register. */
5077 iemNativeRegFreeTmp(pReNative, idxPcReg);
5078
5079 return off;
5080}
5081
5082
5083#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5084 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5085
5086#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5087 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5088 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5089
5090/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5091DECL_INLINE_THROW(uint32_t)
5092iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5093{
5094 /* Allocate a temporary PC register. */
5095 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5096
5097 /* Perform the addition and store the result. */
5098 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5099 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5100 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5101
5102 /* Free but don't flush the PC register. */
5103 iemNativeRegFreeTmp(pReNative, idxPcReg);
5104
5105 return off;
5106}
5107
5108
5109
5110/*********************************************************************************************************************************
5111* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5112*********************************************************************************************************************************/
5113
5114#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5115 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5116 (a_enmEffOpSize), pCallEntry->idxInstr)
5117
5118#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5119 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5120 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5121
5122#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5123 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5124 IEMMODE_16BIT, pCallEntry->idxInstr)
5125
5126#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5127 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5128 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5129
5130#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5131 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5132 IEMMODE_64BIT, pCallEntry->idxInstr)
5133
5134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5135 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5136 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5137
5138/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5139 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5140 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5141DECL_INLINE_THROW(uint32_t)
5142iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5143 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5144{
5145 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5146
5147 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
5148 off = iemNativeRegFlushPendingWrites(pReNative, off);
5149
5150 /* Allocate a temporary PC register. */
5151 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5152
5153 /* Perform the addition. */
5154 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5155
5156 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5157 {
5158 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5159 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5160 }
5161 else
5162 {
5163 /* Just truncate the result to 16-bit IP. */
5164 Assert(enmEffOpSize == IEMMODE_16BIT);
5165 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5166 }
5167 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5168
5169 /* Free but don't flush the PC register. */
5170 iemNativeRegFreeTmp(pReNative, idxPcReg);
5171
5172 return off;
5173}
5174
5175
5176#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5177 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5178 (a_enmEffOpSize), pCallEntry->idxInstr)
5179
5180#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5181 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5182 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5183
5184#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5185 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5186 IEMMODE_16BIT, pCallEntry->idxInstr)
5187
5188#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5189 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5190 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5191
5192#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5193 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5194 IEMMODE_32BIT, pCallEntry->idxInstr)
5195
5196#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5197 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5198 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5199
5200/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5201 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5202 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5203DECL_INLINE_THROW(uint32_t)
5204iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5205 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5206{
5207 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5208
5209 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
5210 off = iemNativeRegFlushPendingWrites(pReNative, off);
5211
5212 /* Allocate a temporary PC register. */
5213 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5214
5215 /* Perform the addition. */
5216 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5217
5218 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
5219 if (enmEffOpSize == IEMMODE_16BIT)
5220 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5221
5222 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
5223 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5224
5225 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5226
5227 /* Free but don't flush the PC register. */
5228 iemNativeRegFreeTmp(pReNative, idxPcReg);
5229
5230 return off;
5231}
5232
5233
5234#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
5235 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
5236
5237#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
5238 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
5239 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5240
5241#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
5242 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
5243
5244#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
5245 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
5246 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5247
5248#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
5249 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
5250
5251#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
5252 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
5253 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5254
5255/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
5256DECL_INLINE_THROW(uint32_t)
5257iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5258 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
5259{
5260 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
5261 off = iemNativeRegFlushPendingWrites(pReNative, off);
5262
5263 /* Allocate a temporary PC register. */
5264 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5265
5266 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
5267 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5268 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5269 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5270 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5271
5272 /* Free but don't flush the PC register. */
5273 iemNativeRegFreeTmp(pReNative, idxPcReg);
5274
5275 return off;
5276}
5277
5278
5279
5280/*********************************************************************************************************************************
5281* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
5282*********************************************************************************************************************************/
5283
5284/**
5285 * Pushes an IEM_MC_IF_XXX onto the condition stack.
5286 *
5287 * @returns Pointer to the condition stack entry on success, NULL on failure
5288 * (too many nestings)
5289 */
5290DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
5291{
5292 uint32_t const idxStack = pReNative->cCondDepth;
5293 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
5294
5295 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
5296 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
5297
5298 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
5299 pEntry->fInElse = false;
5300 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
5301 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
5302
5303 return pEntry;
5304}
5305
5306
5307/**
5308 * Start of the if-block, snapshotting the register and variable state.
5309 */
5310DECL_INLINE_THROW(void)
5311iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
5312{
5313 Assert(offIfBlock != UINT32_MAX);
5314 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5315 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5316 Assert(!pEntry->fInElse);
5317
5318 /* Define the start of the IF block if request or for disassembly purposes. */
5319 if (idxLabelIf != UINT32_MAX)
5320 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
5321#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5322 else
5323 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
5324#else
5325 RT_NOREF(offIfBlock);
5326#endif
5327
5328 /* Copy the initial state so we can restore it in the 'else' block. */
5329 pEntry->InitialState = pReNative->Core;
5330}
5331
5332
5333#define IEM_MC_ELSE() } while (0); \
5334 off = iemNativeEmitElse(pReNative, off); \
5335 do {
5336
5337/** Emits code related to IEM_MC_ELSE. */
5338DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5339{
5340 /* Check sanity and get the conditional stack entry. */
5341 Assert(off != UINT32_MAX);
5342 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5343 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5344 Assert(!pEntry->fInElse);
5345
5346 /* Jump to the endif */
5347 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
5348
5349 /* Define the else label and enter the else part of the condition. */
5350 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5351 pEntry->fInElse = true;
5352
5353 /* Snapshot the core state so we can do a merge at the endif and restore
5354 the snapshot we took at the start of the if-block. */
5355 pEntry->IfFinalState = pReNative->Core;
5356 pReNative->Core = pEntry->InitialState;
5357
5358 return off;
5359}
5360
5361
5362#define IEM_MC_ENDIF() } while (0); \
5363 off = iemNativeEmitEndIf(pReNative, off)
5364
5365/** Emits code related to IEM_MC_ENDIF. */
5366DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5367{
5368 /* Check sanity and get the conditional stack entry. */
5369 Assert(off != UINT32_MAX);
5370 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5371 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5372
5373 /*
5374 * Now we have find common group with the core state at the end of the
5375 * if-final. Use the smallest common denominator and just drop anything
5376 * that isn't the same in both states.
5377 */
5378 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
5379 * which is why we're doing this at the end of the else-block.
5380 * But we'd need more info about future for that to be worth the effort. */
5381 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
5382 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
5383 {
5384 /* shadow guest stuff first. */
5385 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
5386 if (fGstRegs)
5387 {
5388 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
5389 do
5390 {
5391 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5392 fGstRegs &= ~RT_BIT_64(idxGstReg);
5393
5394 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5395 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
5396 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
5397 {
5398 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
5399 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
5400 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
5401 }
5402 } while (fGstRegs);
5403 }
5404 else
5405 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
5406
5407 /* Check variables next. For now we must require them to be identical
5408 or stuff we can recreate. */
5409 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
5410 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
5411 if (fVars)
5412 {
5413 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
5414 do
5415 {
5416 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
5417 fVars &= ~RT_BIT_32(idxVar);
5418
5419 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
5420 {
5421 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
5422 continue;
5423 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5424 {
5425 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5426 if (idxHstReg != UINT8_MAX)
5427 {
5428 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5429 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5430 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
5431 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5432 }
5433 continue;
5434 }
5435 }
5436 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
5437 continue;
5438
5439 /* Irreconcilable, so drop it. */
5440 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5441 if (idxHstReg != UINT8_MAX)
5442 {
5443 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5444 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5445 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
5446 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5447 }
5448 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
5449 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5450 } while (fVars);
5451 }
5452
5453 /* Finally, check that the host register allocations matches. */
5454 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
5455 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
5456 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
5457 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
5458 }
5459
5460 /*
5461 * Define the endif label and maybe the else one if we're still in the 'if' part.
5462 */
5463 if (!pEntry->fInElse)
5464 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5465 else
5466 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
5467 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
5468
5469 /* Pop the conditional stack.*/
5470 pReNative->cCondDepth -= 1;
5471
5472 return off;
5473}
5474
5475
5476#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
5477 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
5478 do {
5479
5480/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
5481DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5482{
5483 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5484
5485 /* Get the eflags. */
5486 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5487 kIemNativeGstRegUse_ReadOnly);
5488
5489 /* Test and jump. */
5490 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5491
5492 /* Free but don't flush the EFlags register. */
5493 iemNativeRegFreeTmp(pReNative, idxEflReg);
5494
5495 /* Make a copy of the core state now as we start the if-block. */
5496 iemNativeCondStartIfBlock(pReNative, off);
5497
5498 return off;
5499}
5500
5501
5502#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
5503 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
5504 do {
5505
5506/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
5507DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5508{
5509 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5510
5511 /* Get the eflags. */
5512 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5513 kIemNativeGstRegUse_ReadOnly);
5514
5515 /* Test and jump. */
5516 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5517
5518 /* Free but don't flush the EFlags register. */
5519 iemNativeRegFreeTmp(pReNative, idxEflReg);
5520
5521 /* Make a copy of the core state now as we start the if-block. */
5522 iemNativeCondStartIfBlock(pReNative, off);
5523
5524 return off;
5525}
5526
5527
5528#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
5529 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
5530 do {
5531
5532/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
5533DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5534{
5535 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5536
5537 /* Get the eflags. */
5538 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5539 kIemNativeGstRegUse_ReadOnly);
5540
5541 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5542 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5543
5544 /* Test and jump. */
5545 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5546
5547 /* Free but don't flush the EFlags register. */
5548 iemNativeRegFreeTmp(pReNative, idxEflReg);
5549
5550 /* Make a copy of the core state now as we start the if-block. */
5551 iemNativeCondStartIfBlock(pReNative, off);
5552
5553 return off;
5554}
5555
5556
5557#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
5558 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
5559 do {
5560
5561/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
5562DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5563{
5564 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5565
5566 /* Get the eflags. */
5567 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5568 kIemNativeGstRegUse_ReadOnly);
5569
5570 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5571 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5572
5573 /* Test and jump. */
5574 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5575
5576 /* Free but don't flush the EFlags register. */
5577 iemNativeRegFreeTmp(pReNative, idxEflReg);
5578
5579 /* Make a copy of the core state now as we start the if-block. */
5580 iemNativeCondStartIfBlock(pReNative, off);
5581
5582 return off;
5583}
5584
5585
5586#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
5587 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
5588 do {
5589
5590#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
5591 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
5592 do {
5593
5594/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
5595DECL_INLINE_THROW(uint32_t)
5596iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5597 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5598{
5599 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5600
5601 /* Get the eflags. */
5602 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5603 kIemNativeGstRegUse_ReadOnly);
5604
5605 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5606 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5607
5608 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5609 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5610 Assert(iBitNo1 != iBitNo2);
5611
5612#ifdef RT_ARCH_AMD64
5613 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
5614
5615 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5616 if (iBitNo1 > iBitNo2)
5617 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5618 else
5619 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5620 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5621
5622#elif defined(RT_ARCH_ARM64)
5623 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5624 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5625
5626 /* and tmpreg, eflreg, #1<<iBitNo1 */
5627 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5628
5629 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5630 if (iBitNo1 > iBitNo2)
5631 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5632 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5633 else
5634 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5635 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5636
5637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5638
5639#else
5640# error "Port me"
5641#endif
5642
5643 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5644 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5645 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5646
5647 /* Free but don't flush the EFlags and tmp registers. */
5648 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5649 iemNativeRegFreeTmp(pReNative, idxEflReg);
5650
5651 /* Make a copy of the core state now as we start the if-block. */
5652 iemNativeCondStartIfBlock(pReNative, off);
5653
5654 return off;
5655}
5656
5657
5658#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
5659 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
5660 do {
5661
5662#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
5663 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
5664 do {
5665
5666/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
5667 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
5668DECL_INLINE_THROW(uint32_t)
5669iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
5670 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5671{
5672 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5673
5674 /* We need an if-block label for the non-inverted variant. */
5675 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
5676 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
5677
5678 /* Get the eflags. */
5679 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5680 kIemNativeGstRegUse_ReadOnly);
5681
5682 /* Translate the flag masks to bit numbers. */
5683 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5684 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5685
5686 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5687 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5688 Assert(iBitNo1 != iBitNo);
5689
5690 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5691 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5692 Assert(iBitNo2 != iBitNo);
5693 Assert(iBitNo2 != iBitNo1);
5694
5695#ifdef RT_ARCH_AMD64
5696 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
5697#elif defined(RT_ARCH_ARM64)
5698 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5699#endif
5700
5701 /* Check for the lone bit first. */
5702 if (!fInverted)
5703 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5704 else
5705 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
5706
5707 /* Then extract and compare the other two bits. */
5708#ifdef RT_ARCH_AMD64
5709 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5710 if (iBitNo1 > iBitNo2)
5711 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5712 else
5713 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5714 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5715
5716#elif defined(RT_ARCH_ARM64)
5717 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5718
5719 /* and tmpreg, eflreg, #1<<iBitNo1 */
5720 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5721
5722 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5723 if (iBitNo1 > iBitNo2)
5724 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5725 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5726 else
5727 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5728 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5729
5730 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5731
5732#else
5733# error "Port me"
5734#endif
5735
5736 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5737 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5738 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5739
5740 /* Free but don't flush the EFlags and tmp registers. */
5741 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5742 iemNativeRegFreeTmp(pReNative, idxEflReg);
5743
5744 /* Make a copy of the core state now as we start the if-block. */
5745 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
5746
5747 return off;
5748}
5749
5750
5751#define IEM_MC_IF_CX_IS_NZ() \
5752 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
5753 do {
5754
5755/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5756DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5757{
5758 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5759
5760 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5761 kIemNativeGstRegUse_ReadOnly);
5762 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5763 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5764
5765 iemNativeCondStartIfBlock(pReNative, off);
5766 return off;
5767}
5768
5769
5770#define IEM_MC_IF_ECX_IS_NZ() \
5771 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
5772 do {
5773
5774#define IEM_MC_IF_RCX_IS_NZ() \
5775 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
5776 do {
5777
5778/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
5779DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
5780{
5781 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5782
5783 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5784 kIemNativeGstRegUse_ReadOnly);
5785 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5786 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5787
5788 iemNativeCondStartIfBlock(pReNative, off);
5789 return off;
5790}
5791
5792
5793#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5794 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
5795 do {
5796
5797#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5798 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
5799 do {
5800
5801/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5802DECL_INLINE_THROW(uint32_t)
5803iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
5804{
5805 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5806
5807 /* We have to load both RCX and EFLAGS before we can start branching,
5808 otherwise we'll end up in the else-block with an inconsistent
5809 register allocator state.
5810 Doing EFLAGS first as it's more likely to be loaded, right? */
5811 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5812 kIemNativeGstRegUse_ReadOnly);
5813 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5814 kIemNativeGstRegUse_ReadOnly);
5815
5816 /** @todo we could reduce this to a single branch instruction by spending a
5817 * temporary register and some setnz stuff. Not sure if loops are
5818 * worth it. */
5819 /* Check CX. */
5820 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5821
5822 /* Check the EFlags bit. */
5823 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5824 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5825 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5826 !fCheckIfSet /*fJmpIfSet*/);
5827
5828 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5829 iemNativeRegFreeTmp(pReNative, idxEflReg);
5830
5831 iemNativeCondStartIfBlock(pReNative, off);
5832 return off;
5833}
5834
5835
5836#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5837 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
5838 do {
5839
5840#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5841 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
5842 do {
5843
5844#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5845 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
5846 do {
5847
5848#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5849 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
5850 do {
5851
5852/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
5853 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
5854 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
5855 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
5856DECL_INLINE_THROW(uint32_t)
5857iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5858 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
5859{
5860 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5861
5862 /* We have to load both RCX and EFLAGS before we can start branching,
5863 otherwise we'll end up in the else-block with an inconsistent
5864 register allocator state.
5865 Doing EFLAGS first as it's more likely to be loaded, right? */
5866 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5867 kIemNativeGstRegUse_ReadOnly);
5868 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5869 kIemNativeGstRegUse_ReadOnly);
5870
5871 /** @todo we could reduce this to a single branch instruction by spending a
5872 * temporary register and some setnz stuff. Not sure if loops are
5873 * worth it. */
5874 /* Check RCX/ECX. */
5875 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5876
5877 /* Check the EFlags bit. */
5878 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5879 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5880 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5881 !fCheckIfSet /*fJmpIfSet*/);
5882
5883 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5884 iemNativeRegFreeTmp(pReNative, idxEflReg);
5885
5886 iemNativeCondStartIfBlock(pReNative, off);
5887 return off;
5888}
5889
5890
5891
5892/*********************************************************************************************************************************
5893* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
5894*********************************************************************************************************************************/
5895/** Number of hidden arguments for CIMPL calls.
5896 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
5897#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5898# define IEM_CIMPL_HIDDEN_ARGS 3
5899#else
5900# define IEM_CIMPL_HIDDEN_ARGS 2
5901#endif
5902
5903#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
5904 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
5905
5906#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
5907 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
5908
5909#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
5910 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
5911
5912#define IEM_MC_LOCAL(a_Type, a_Name) \
5913 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
5914
5915#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
5916 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
5917
5918
5919/**
5920 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
5921 */
5922DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
5923{
5924 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
5925 return IEM_CIMPL_HIDDEN_ARGS;
5926 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
5927 return 1;
5928 return 0;
5929}
5930
5931
5932/**
5933 * Internal work that allocates a variable with kind set to
5934 * kIemNativeVarKind_Invalid and no current stack allocation.
5935 *
5936 * The kind will either be set by the caller or later when the variable is first
5937 * assigned a value.
5938 */
5939static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5940{
5941 Assert(cbType > 0 && cbType <= 64);
5942 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
5943 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
5944 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
5945 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5946 pReNative->Core.aVars[idxVar].cbVar = cbType;
5947 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5948 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5949 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
5950 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
5951 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
5952 pReNative->Core.aVars[idxVar].u.uValue = 0;
5953 return idxVar;
5954}
5955
5956
5957/**
5958 * Internal work that allocates an argument variable w/o setting enmKind.
5959 */
5960static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5961{
5962 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
5963 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5964 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
5965
5966 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5967 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
5968 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
5969 return idxVar;
5970}
5971
5972
5973/**
5974 * Gets the stack slot for a stack variable, allocating one if necessary.
5975 *
5976 * Calling this function implies that the stack slot will contain a valid
5977 * variable value. The caller deals with any register currently assigned to the
5978 * variable, typically by spilling it into the stack slot.
5979 *
5980 * @returns The stack slot number.
5981 * @param pReNative The recompiler state.
5982 * @param idxVar The variable.
5983 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
5984 */
5985DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5986{
5987 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5988 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5989
5990 /* Already got a slot? */
5991 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5992 if (idxStackSlot != UINT8_MAX)
5993 {
5994 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
5995 return idxStackSlot;
5996 }
5997
5998 /*
5999 * A single slot is easy to allocate.
6000 * Allocate them from the top end, closest to BP, to reduce the displacement.
6001 */
6002 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6003 {
6004 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6005 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6006 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6007 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6008 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6009 return (uint8_t)iSlot;
6010 }
6011
6012 /*
6013 * We need more than one stack slot.
6014 *
6015 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6016 */
6017 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6018 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6019 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6020 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6021 uint32_t bmStack = ~pReNative->Core.bmStack;
6022 while (bmStack != UINT32_MAX)
6023 {
6024/** @todo allocate from the top to reduce BP displacement. */
6025 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6026 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6027 if (!(iSlot & fBitAlignMask))
6028 {
6029 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6030 {
6031 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6032 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6033 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6034 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6035 return (uint8_t)iSlot;
6036 }
6037 }
6038 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6039 }
6040 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6041}
6042
6043
6044/**
6045 * Changes the variable to a stack variable.
6046 *
6047 * Currently this is s only possible to do the first time the variable is used,
6048 * switching later is can be implemented but not done.
6049 *
6050 * @param pReNative The recompiler state.
6051 * @param idxVar The variable.
6052 * @throws VERR_IEM_VAR_IPE_2
6053 */
6054static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6055{
6056 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6057 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6058 {
6059 /* We could in theory transition from immediate to stack as well, but it
6060 would involve the caller doing work storing the value on the stack. So,
6061 till that's required we only allow transition from invalid. */
6062 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6063 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6064 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6065 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6066
6067 /* Note! We don't allocate a stack slot here, that's only done when a
6068 slot is actually needed to hold a variable value. */
6069 }
6070}
6071
6072
6073/**
6074 * Sets it to a variable with a constant value.
6075 *
6076 * This does not require stack storage as we know the value and can always
6077 * reload it, unless of course it's referenced.
6078 *
6079 * @param pReNative The recompiler state.
6080 * @param idxVar The variable.
6081 * @param uValue The immediate value.
6082 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6083 */
6084static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6085{
6086 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6087 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6088 {
6089 /* Only simple transitions for now. */
6090 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6091 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6092 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6093 }
6094 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6095
6096 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6097}
6098
6099
6100/**
6101 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6102 *
6103 * This does not require stack storage as we know the value and can always
6104 * reload it. Loading is postponed till needed.
6105 *
6106 * @param pReNative The recompiler state.
6107 * @param idxVar The variable.
6108 * @param idxOtherVar The variable to take the (stack) address of.
6109 *
6110 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6111 */
6112static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6113{
6114 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6115 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6116
6117 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6118 {
6119 /* Only simple transitions for now. */
6120 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6121 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6122 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6123 }
6124 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6125
6126 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
6127
6128 /* Update the other variable, ensure it's a stack variable. */
6129 /** @todo handle variables with const values... that'll go boom now. */
6130 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6131 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
6132}
6133
6134
6135/**
6136 * Sets the variable to a reference (pointer) to a guest register reference.
6137 *
6138 * This does not require stack storage as we know the value and can always
6139 * reload it. Loading is postponed till needed.
6140 *
6141 * @param pReNative The recompiler state.
6142 * @param idxVar The variable.
6143 * @param enmRegClass The class guest registers to reference.
6144 * @param idxReg The register within @a enmRegClass to reference.
6145 *
6146 * @throws VERR_IEM_VAR_IPE_2
6147 */
6148static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6149 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6150{
6151 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6152
6153 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
6154 {
6155 /* Only simple transitions for now. */
6156 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6157 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6158 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
6159 }
6160 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6161
6162 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
6163 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
6164}
6165
6166
6167DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6168{
6169 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6170}
6171
6172
6173DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6174{
6175 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6176 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6177 return idxVar;
6178}
6179
6180
6181DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6182{
6183 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6184 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6185 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6186 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6187
6188 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6189 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
6190 return idxArgVar;
6191}
6192
6193
6194DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6195{
6196 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6197 /* Don't set to stack now, leave that to the first use as for instance
6198 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6199 return idxVar;
6200}
6201
6202
6203DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6204{
6205 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6206 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6207 return idxVar;
6208}
6209
6210
6211/**
6212 * Makes sure variable @a idxVar has a register assigned to it.
6213 *
6214 * @returns The host register number.
6215 * @param pReNative The recompiler state.
6216 * @param idxVar The variable.
6217 * @param poff Pointer to the instruction buffer offset.
6218 * In case a register needs to be freed up or the value
6219 * loaded off the stack.
6220 * @param fInitialized Set if the variable must already have been initialized.
6221 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6222 * the case.
6223 */
6224DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6225 uint32_t *poff, bool fInitialized = false)
6226{
6227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6228 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
6229/** @todo we must mark the variable as active and add a release function to
6230 * mark it as inactive, otherwise temporary register allocations may
6231 * cause the variable to be spilled onto the stack. */
6232
6233 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6234 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6235 {
6236 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
6237 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6238 return idxReg;
6239 }
6240
6241 /*
6242 * If the kind of variable has not yet been set, default to 'stack'.
6243 */
6244 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
6245 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6246 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
6247 iemNativeVarSetKindToStack(pReNative, idxVar);
6248
6249 /*
6250 * We have to allocate a register for the variable, even if its a stack one
6251 * as we don't know if there are modification being made to it before its
6252 * finalized (todo: analyze and insert hints about that?).
6253 *
6254 * If we can, we try get the correct register for argument variables. This
6255 * is assuming that most argument variables are fetched as close as possible
6256 * to the actual call, so that there aren't any interfering hidden calls
6257 * (memory accesses, etc) inbetween.
6258 *
6259 * If we cannot or it's a variable, we make sure no argument registers
6260 * that will be used by this MC block will be allocated here, and we always
6261 * prefer non-volatile registers to avoid needing to spill stuff for internal
6262 * call.
6263 */
6264 /** @todo Detect too early argument value fetches and warn about hidden
6265 * calls causing less optimal code to be generated in the python script. */
6266
6267 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6268 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6269 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6270 {
6271 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6272 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6273 }
6274 else
6275 {
6276 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6277 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6278 & ~pReNative->Core.bmHstRegsWithGstShadow
6279 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6280 & fNotArgsMask;
6281 if (fRegs)
6282 {
6283 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6284 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6285 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6286 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6287 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6288 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6289 }
6290 else
6291 {
6292 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6293 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6294 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6295 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6296 }
6297 }
6298 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6299 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6300
6301 /*
6302 * Load it off the stack if we've got a stack slot.
6303 */
6304 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6305 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6306 {
6307 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6308 switch (pReNative->Core.aVars[idxVar].cbVar)
6309 {
6310 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6311 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6312 case 3: AssertFailed(); RT_FALL_THRU();
6313 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6314 default: AssertFailed(); RT_FALL_THRU();
6315 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6316 }
6317 }
6318 else
6319 {
6320 Assert(idxStackSlot == UINT8_MAX);
6321 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6322 }
6323 return idxReg;
6324}
6325
6326
6327/**
6328 * The value of variable @a idxVar will be written in full to the @a enmGstReg
6329 * guest register.
6330 *
6331 * This function makes sure there is a register for it and sets it to be the
6332 * current shadow copy of @a enmGstReg.
6333 *
6334 * @returns The host register number.
6335 * @param pReNative The recompiler state.
6336 * @param idxVar The variable.
6337 * @param enmGstReg The guest register this variable will be written to
6338 * after this call.
6339 * @param poff Pointer to the instruction buffer offset.
6340 * In case a register needs to be freed up or if the
6341 * variable content needs to be loaded off the stack.
6342 *
6343 * @note We DO NOT expect @a idxVar to be an argument variable,
6344 * because we can only in the commit stage of an instruction when this
6345 * function is used.
6346 */
6347DECL_HIDDEN_THROW(uint8_t)
6348iemNativeVarAllocRegisterForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
6349{
6350 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6351 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
6352 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
6353 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
6354 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
6355 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
6356 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6357
6358 /*
6359 * This shouldn't ever be used for arguments, unless it's in a weird else
6360 * branch that doesn't do any calling and even then it's questionable.
6361 *
6362 * However, in case someone writes crazy wrong MC code and does register
6363 * updates before making calls, just use the regular register allocator to
6364 * ensure we get a register suitable for the intended argument number.
6365 */
6366 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarAllocRegister(pReNative, idxVar, poff));
6367
6368 /*
6369 * If there is already a register for the variable, we transfer/set the
6370 * guest shadow copy assignment to it.
6371 */
6372 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6373 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6374 {
6375 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
6376 {
6377 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
6378 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
6379 Log12(("iemNativeVarAllocRegisterForGuestReg: Moved %s for guest %s into %s for full write\n",
6380 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
6381 }
6382 else
6383 {
6384 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
6385 Log12(("iemNativeVarAllocRegisterForGuestReg: Marking %s as copy of guest %s (full write)\n",
6386 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
6387 }
6388 /** @todo figure this one out. We need some way of making sure the register isn't
6389 * modified after this point, just in case we start writing crappy MC code. */
6390 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
6391 return idxReg;
6392 }
6393 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6394
6395 /*
6396 * Because this is supposed to be the commit stage, we're just tag along with the
6397 * temporary register allocator and upgrade it to a variable register.
6398 */
6399 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
6400 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
6401 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
6402 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
6403 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
6404 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6405
6406 /*
6407 * Now we need to load the register value.
6408 */
6409 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
6410 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
6411 else
6412 {
6413 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6414 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6415 switch (pReNative->Core.aVars[idxVar].cbVar)
6416 {
6417 case sizeof(uint64_t):
6418 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
6419 break;
6420 case sizeof(uint32_t):
6421 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
6422 break;
6423 case sizeof(uint16_t):
6424 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
6425 break;
6426 case sizeof(uint8_t):
6427 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
6428 break;
6429 default:
6430 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6431 }
6432 }
6433
6434 return idxReg;
6435}
6436
6437
6438/**
6439 * Sets the host register for @a idxVarRc to @a idxReg.
6440 *
6441 * The register must not be allocated. Any guest register shadowing will be
6442 * implictly dropped by this call.
6443 *
6444 * The variable must not have any register associated with it (causes
6445 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
6446 * implied.
6447 *
6448 * @returns idxReg
6449 * @param pReNative The recompiler state.
6450 * @param idxVar The variable.
6451 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
6452 * @param off For recording in debug info.
6453 *
6454 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
6455 */
6456DECL_INLINE_THROW(uint8_t) iemNativeVarSetRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
6457{
6458 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6459 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6460 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
6461 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
6462
6463 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
6464 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6465
6466 iemNativeVarSetKindToStack(pReNative, idxVar);
6467 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6468
6469 return idxReg;
6470}
6471
6472
6473/**
6474 * Worker that frees the stack slots for variable @a idxVar if any allocated.
6475 *
6476 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
6477 */
6478DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6479{
6480 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6481 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6482 {
6483 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
6484 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
6485 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
6486 Assert(cSlots > 0);
6487 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
6488 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
6489 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
6490 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6491 }
6492 else
6493 Assert(idxStackSlot == UINT8_MAX);
6494}
6495
6496
6497/**
6498 * Worker that frees a single variable.
6499 *
6500 * ASSUMES that @a idxVar is valid.
6501 */
6502DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6503{
6504 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
6505 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
6506
6507 /* Free the host register first if any assigned. */
6508 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6509 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6510 {
6511 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
6512 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
6513 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6514 }
6515
6516 /* Free argument mapping. */
6517 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6518 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
6519 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
6520
6521 /* Free the stack slots. */
6522 iemNativeVarFreeStackSlots(pReNative, idxVar);
6523
6524 /* Free the actual variable. */
6525 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6526 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6527}
6528
6529
6530/**
6531 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
6532 */
6533DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
6534{
6535 while (bmVars != 0)
6536 {
6537 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
6538 bmVars &= ~RT_BIT_32(idxVar);
6539
6540#if 1 /** @todo optimize by simplifying this later... */
6541 iemNativeVarFreeOneWorker(pReNative, idxVar);
6542#else
6543 /* Only need to free the host register, the rest is done as bulk updates below. */
6544 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6545 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6546 {
6547 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
6548 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
6549 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6550 }
6551#endif
6552 }
6553#if 0 /** @todo optimize by simplifying this later... */
6554 pReNative->Core.bmVars = 0;
6555 pReNative->Core.bmStack = 0;
6556 pReNative->Core.u64ArgVars = UINT64_MAX;
6557#endif
6558}
6559
6560
6561/**
6562 * This is called by IEM_MC_END() to clean up all variables.
6563 */
6564DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
6565{
6566 uint32_t const bmVars = pReNative->Core.bmVars;
6567 if (bmVars != 0)
6568 iemNativeVarFreeAllSlow(pReNative, bmVars);
6569 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6570 Assert(pReNative->Core.bmStack == 0);
6571}
6572
6573
6574#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
6575
6576/**
6577 * This is called by IEM_MC_FREE_LOCAL.
6578 */
6579DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6580{
6581 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6582 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6583 iemNativeVarFreeOneWorker(pReNative, idxVar);
6584}
6585
6586
6587#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
6588
6589/**
6590 * This is called by IEM_MC_FREE_ARG.
6591 */
6592DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6593{
6594 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6595 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
6596 iemNativeVarFreeOneWorker(pReNative, idxVar);
6597}
6598
6599
6600
6601/*********************************************************************************************************************************
6602* Emitters for IEM_MC_CALL_CIMPL_XXX *
6603*********************************************************************************************************************************/
6604
6605/**
6606 * Emits code to load a reference to the given guest register into @a idxGprDst.
6607 */
6608DECL_INLINE_THROW(uint32_t)
6609iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
6610 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
6611{
6612 /*
6613 * Get the offset relative to the CPUMCTX structure.
6614 */
6615 uint32_t offCpumCtx;
6616 switch (enmClass)
6617 {
6618 case kIemNativeGstRegRef_Gpr:
6619 Assert(idxRegInClass < 16);
6620 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
6621 break;
6622
6623 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
6624 Assert(idxRegInClass < 4);
6625 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
6626 break;
6627
6628 case kIemNativeGstRegRef_EFlags:
6629 Assert(idxRegInClass == 0);
6630 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
6631 break;
6632
6633 case kIemNativeGstRegRef_MxCsr:
6634 Assert(idxRegInClass == 0);
6635 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
6636 break;
6637
6638 case kIemNativeGstRegRef_FpuReg:
6639 Assert(idxRegInClass < 8);
6640 AssertFailed(); /** @todo what kind of indexing? */
6641 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
6642 break;
6643
6644 case kIemNativeGstRegRef_MReg:
6645 Assert(idxRegInClass < 8);
6646 AssertFailed(); /** @todo what kind of indexing? */
6647 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
6648 break;
6649
6650 case kIemNativeGstRegRef_XReg:
6651 Assert(idxRegInClass < 16);
6652 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
6653 break;
6654
6655 default:
6656 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
6657 }
6658
6659 /*
6660 * Load the value into the destination register.
6661 */
6662#ifdef RT_ARCH_AMD64
6663 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
6664
6665#elif defined(RT_ARCH_ARM64)
6666 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6667 Assert(offCpumCtx < 4096);
6668 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
6669
6670#else
6671# error "Port me!"
6672#endif
6673
6674 return off;
6675}
6676
6677
6678/**
6679 * Common code for CIMPL and AIMPL calls.
6680 *
6681 * These are calls that uses argument variables and such. They should not be
6682 * confused with internal calls required to implement an MC operation,
6683 * like a TLB load and similar.
6684 *
6685 * Upon return all that is left to do is to load any hidden arguments and
6686 * perform the call. All argument variables are freed.
6687 *
6688 * @returns New code buffer offset; throws VBox status code on error.
6689 * @param pReNative The native recompile state.
6690 * @param off The code buffer offset.
6691 * @param cArgs The total nubmer of arguments (includes hidden
6692 * count).
6693 * @param cHiddenArgs The number of hidden arguments. The hidden
6694 * arguments must not have any variable declared for
6695 * them, whereas all the regular arguments must
6696 * (tstIEMCheckMc ensures this).
6697 */
6698DECL_HIDDEN_THROW(uint32_t)
6699iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
6700{
6701#ifdef VBOX_STRICT
6702 /*
6703 * Assert sanity.
6704 */
6705 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
6706 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
6707 for (unsigned i = 0; i < cHiddenArgs; i++)
6708 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
6709 for (unsigned i = cHiddenArgs; i < cArgs; i++)
6710 {
6711 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
6712 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
6713 }
6714#endif
6715
6716 /*
6717 * Before we do anything else, go over variables that are referenced and
6718 * make sure they are not in a register.
6719 */
6720 uint32_t bmVars = pReNative->Core.bmVars;
6721 if (bmVars)
6722 do
6723 {
6724 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
6725 bmVars &= ~RT_BIT_32(idxVar);
6726
6727 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
6728 {
6729 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
6730 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
6731 {
6732 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6733 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
6734 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
6735 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
6736 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
6737
6738 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6739 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
6740 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
6741 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
6742 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
6743 }
6744 }
6745 } while (bmVars != 0);
6746
6747 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
6748
6749 /*
6750 * First, go over the host registers that will be used for arguments and make
6751 * sure they either hold the desired argument or are free.
6752 */
6753 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
6754 for (uint32_t i = 0; i < cRegArgs; i++)
6755 {
6756 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6757 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6758 {
6759 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
6760 {
6761 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
6762 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6763 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
6764 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6765 if (uArgNo == i)
6766 { /* prefect */ }
6767 /* The variable allocator logic should make sure this is impossible,
6768 except for when the return register is used as a parameter (ARM,
6769 but not x86). */
6770#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
6771 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
6772 {
6773# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
6774# error "Implement this"
6775# endif
6776 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
6777 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
6778 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
6779 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
6780 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
6781 }
6782#endif
6783 else
6784 {
6785 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
6786
6787 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6788 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
6789 else
6790 {
6791 /* just free it, can be reloaded if used again */
6792 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6793 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
6794 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
6795 }
6796 }
6797 }
6798 else
6799 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
6800 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
6801 }
6802 }
6803
6804 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
6805
6806#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
6807 /*
6808 * If there are any stack arguments, make sure they are in their place as well.
6809 *
6810 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
6811 * the caller) be loading it later and it must be free (see first loop).
6812 */
6813 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
6814 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
6815 {
6816 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6817 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
6818 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6819 {
6820 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
6821 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
6822 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
6823 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6824 }
6825 else
6826 {
6827 /* Use ARG0 as temp for stuff we need registers for. */
6828 switch (pReNative->Core.aVars[idxVar].enmKind)
6829 {
6830 case kIemNativeVarKind_Stack:
6831 {
6832 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6833 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6834 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
6835 iemNativeStackCalcBpDisp(idxStackSlot));
6836 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6837 continue;
6838 }
6839
6840 case kIemNativeVarKind_Immediate:
6841 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
6842 continue;
6843
6844 case kIemNativeVarKind_VarRef:
6845 {
6846 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
6847 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
6848 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
6849 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
6850 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
6851 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
6852 {
6853 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
6854 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
6855 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6856 }
6857 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
6858 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6859 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
6860 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6861 continue;
6862 }
6863
6864 case kIemNativeVarKind_GstRegRef:
6865 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
6866 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
6867 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
6868 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6869 continue;
6870
6871 case kIemNativeVarKind_Invalid:
6872 case kIemNativeVarKind_End:
6873 break;
6874 }
6875 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6876 }
6877 }
6878#else
6879 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
6880#endif
6881
6882 /*
6883 * Make sure the argument variables are loaded into their respective registers.
6884 *
6885 * We can optimize this by ASSUMING that any register allocations are for
6886 * registeres that have already been loaded and are ready. The previous step
6887 * saw to that.
6888 */
6889 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
6890 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6891 {
6892 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6893 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6894 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
6895 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
6896 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
6897 else
6898 {
6899 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6900 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6901 {
6902 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6903 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
6904 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
6905 | RT_BIT_32(idxArgReg);
6906 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
6907 }
6908 else
6909 {
6910 /* Use ARG0 as temp for stuff we need registers for. */
6911 switch (pReNative->Core.aVars[idxVar].enmKind)
6912 {
6913 case kIemNativeVarKind_Stack:
6914 {
6915 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6916 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6917 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
6918 continue;
6919 }
6920
6921 case kIemNativeVarKind_Immediate:
6922 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
6923 continue;
6924
6925 case kIemNativeVarKind_VarRef:
6926 {
6927 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
6928 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
6929 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
6930 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
6931 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
6932 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
6933 {
6934 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
6935 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
6936 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6937 }
6938 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
6939 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6940 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
6941 continue;
6942 }
6943
6944 case kIemNativeVarKind_GstRegRef:
6945 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
6946 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
6947 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
6948 continue;
6949
6950 case kIemNativeVarKind_Invalid:
6951 case kIemNativeVarKind_End:
6952 break;
6953 }
6954 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6955 }
6956 }
6957 }
6958#ifdef VBOX_STRICT
6959 else
6960 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6961 {
6962 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
6963 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
6964 }
6965#endif
6966
6967 /*
6968 * Free all argument variables (simplified).
6969 * Their lifetime always expires with the call they are for.
6970 */
6971 /** @todo Make the python script check that arguments aren't used after
6972 * IEM_MC_CALL_XXXX. */
6973 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
6974 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
6975 * an argument value. There is also some FPU stuff. */
6976 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
6977 {
6978 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6979 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6980
6981 /* no need to free registers: */
6982 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
6983 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
6984 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
6985 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
6986 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
6987 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
6988
6989 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
6990 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6991 iemNativeVarFreeStackSlots(pReNative, idxVar);
6992 }
6993 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6994
6995 /*
6996 * Flush volatile registers as we make the call.
6997 */
6998 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
6999
7000 return off;
7001}
7002
7003
7004/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
7005DECL_HIDDEN_THROW(uint32_t)
7006iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
7007 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
7008
7009{
7010 /*
7011 * Do all the call setup and cleanup.
7012 */
7013 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
7014
7015 /*
7016 * Load the two or three hidden arguments.
7017 */
7018#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7019 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7020 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7021 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
7022#else
7023 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7024 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
7025#endif
7026
7027 /*
7028 * Make the call and check the return code.
7029 *
7030 * Shadow PC copies are always flushed here, other stuff depends on flags.
7031 * Segment and general purpose registers are explictily flushed via the
7032 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
7033 * macros.
7034 */
7035 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
7036#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7037 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7038#endif
7039 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
7040 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
7041 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
7042 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7043
7044 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7045}
7046
7047
7048#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7049 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
7050
7051/** Emits code for IEM_MC_CALL_CIMPL_1. */
7052DECL_INLINE_THROW(uint32_t)
7053iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7054 uintptr_t pfnCImpl, uint8_t idxArg0)
7055{
7056 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7057 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
7058}
7059
7060
7061#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7062 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
7063
7064/** Emits code for IEM_MC_CALL_CIMPL_2. */
7065DECL_INLINE_THROW(uint32_t)
7066iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7067 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
7068{
7069 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7070 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7071 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
7072}
7073
7074
7075#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7076 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7077 (uintptr_t)a_pfnCImpl, a0, a1, a2)
7078
7079/** Emits code for IEM_MC_CALL_CIMPL_3. */
7080DECL_INLINE_THROW(uint32_t)
7081iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7082 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7083{
7084 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7085 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7086 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7087 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
7088}
7089
7090
7091#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
7092 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7093 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
7094
7095/** Emits code for IEM_MC_CALL_CIMPL_4. */
7096DECL_INLINE_THROW(uint32_t)
7097iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7098 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7099{
7100 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7101 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7102 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7103 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7104 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
7105}
7106
7107
7108#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
7109 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7110 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
7111
7112/** Emits code for IEM_MC_CALL_CIMPL_4. */
7113DECL_INLINE_THROW(uint32_t)
7114iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7115 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
7116{
7117 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7118 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7119 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7120 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7121 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
7122 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
7123}
7124
7125
7126/** Recompiler debugging: Flush guest register shadow copies. */
7127#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
7128
7129
7130
7131/*********************************************************************************************************************************
7132* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
7133*********************************************************************************************************************************/
7134
7135/**
7136 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
7137 */
7138DECL_INLINE_THROW(uint32_t)
7139iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7140 uintptr_t pfnAImpl, uint8_t cArgs)
7141{
7142 if (idxVarRc != UINT8_MAX)
7143 {
7144 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
7145 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
7146 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
7147 }
7148
7149 /*
7150 * Do all the call setup and cleanup.
7151 */
7152 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
7153
7154 /*
7155 * Make the call and update the return code variable if we've got one.
7156 */
7157 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7158 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
7159 {
7160pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
7161 iemNativeVarSetRegister(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
7162 }
7163
7164 return off;
7165}
7166
7167
7168
7169#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
7170 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
7171
7172#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
7173 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
7174
7175/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
7176DECL_INLINE_THROW(uint32_t)
7177iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
7178{
7179 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
7180}
7181
7182
7183#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
7184 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
7185
7186#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
7187 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
7188
7189/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
7190DECL_INLINE_THROW(uint32_t)
7191iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
7192{
7193 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7194 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
7195}
7196
7197
7198#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
7199 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
7200
7201#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
7202 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
7203
7204/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
7205DECL_INLINE_THROW(uint32_t)
7206iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7207 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7208{
7209 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7210 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7211 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
7212}
7213
7214
7215#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
7216 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
7217
7218#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
7219 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
7220
7221/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
7222DECL_INLINE_THROW(uint32_t)
7223iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7224 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7225{
7226 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7227 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7228 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7229 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
7230}
7231
7232
7233#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
7234 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7235
7236#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
7237 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7238
7239/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
7240DECL_INLINE_THROW(uint32_t)
7241iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7242 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7243{
7244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7246 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7247 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
7248 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
7249}
7250
7251
7252
7253/*********************************************************************************************************************************
7254* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
7255*********************************************************************************************************************************/
7256
7257#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
7258 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
7259
7260#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7261 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
7262
7263#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7264 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
7265
7266#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7267 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
7268
7269
7270/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
7271 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
7272DECL_INLINE_THROW(uint32_t)
7273iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
7274{
7275 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7276 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7277 Assert(iGRegEx < 20);
7278
7279 /* Same discussion as in iemNativeEmitFetchGregU16 */
7280 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7281 kIemNativeGstRegUse_ReadOnly);
7282
7283 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7284 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7285
7286 /* The value is zero-extended to the full 64-bit host register width. */
7287 if (iGRegEx < 16)
7288 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7289 else
7290 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7291
7292 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7293 return off;
7294}
7295
7296
7297#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7298 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
7299
7300#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7301 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
7302
7303#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7304 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
7305
7306/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
7307DECL_INLINE_THROW(uint32_t)
7308iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
7309{
7310 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7311 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7312 Assert(iGRegEx < 20);
7313
7314 /* Same discussion as in iemNativeEmitFetchGregU16 */
7315 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7316 kIemNativeGstRegUse_ReadOnly);
7317
7318 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7319 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7320
7321 if (iGRegEx < 16)
7322 {
7323 switch (cbSignExtended)
7324 {
7325 case sizeof(uint16_t):
7326 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7327 break;
7328 case sizeof(uint32_t):
7329 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7330 break;
7331 case sizeof(uint64_t):
7332 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7333 break;
7334 default: AssertFailed(); break;
7335 }
7336 }
7337 else
7338 {
7339 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7340 switch (cbSignExtended)
7341 {
7342 case sizeof(uint16_t):
7343 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7344 break;
7345 case sizeof(uint32_t):
7346 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7347 break;
7348 case sizeof(uint64_t):
7349 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7350 break;
7351 default: AssertFailed(); break;
7352 }
7353 }
7354
7355 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7356 return off;
7357}
7358
7359
7360
7361#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
7362 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
7363
7364#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
7365 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7366
7367#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
7368 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7369
7370/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
7371DECL_INLINE_THROW(uint32_t)
7372iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7373{
7374 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7375 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7376 Assert(iGReg < 16);
7377
7378 /*
7379 * We can either just load the low 16-bit of the GPR into a host register
7380 * for the variable, or we can do so via a shadow copy host register. The
7381 * latter will avoid having to reload it if it's being stored later, but
7382 * will waste a host register if it isn't touched again. Since we don't
7383 * know what going to happen, we choose the latter for now.
7384 */
7385 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7386 kIemNativeGstRegUse_ReadOnly);
7387
7388 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7389 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7390 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7391
7392 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7393 return off;
7394}
7395
7396
7397#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
7398 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7399
7400#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
7401 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7402
7403/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
7404DECL_INLINE_THROW(uint32_t)
7405iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
7406{
7407 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7408 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7409 Assert(iGReg < 16);
7410
7411 /*
7412 * We can either just load the low 16-bit of the GPR into a host register
7413 * for the variable, or we can do so via a shadow copy host register. The
7414 * latter will avoid having to reload it if it's being stored later, but
7415 * will waste a host register if it isn't touched again. Since we don't
7416 * know what going to happen, we choose the latter for now.
7417 */
7418 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7419 kIemNativeGstRegUse_ReadOnly);
7420
7421 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7422 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7423 if (cbSignExtended == sizeof(uint32_t))
7424 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7425 else
7426 {
7427 Assert(cbSignExtended == sizeof(uint64_t));
7428 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7429 }
7430
7431 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7432 return off;
7433}
7434
7435
7436#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
7437 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
7438
7439#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
7440 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
7441
7442/** Emits code for IEM_MC_FETCH_GREG_U32. */
7443DECL_INLINE_THROW(uint32_t)
7444iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7445{
7446 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7447 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
7448 Assert(iGReg < 16);
7449
7450 /*
7451 * We can either just load the low 16-bit of the GPR into a host register
7452 * for the variable, or we can do so via a shadow copy host register. The
7453 * latter will avoid having to reload it if it's being stored later, but
7454 * will waste a host register if it isn't touched again. Since we don't
7455 * know what going to happen, we choose the latter for now.
7456 */
7457 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7458 kIemNativeGstRegUse_ReadOnly);
7459
7460 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7461 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7462 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
7463
7464 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7465 return off;
7466}
7467
7468
7469#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
7470 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
7471
7472/** Emits code for IEM_MC_FETCH_GREG_U32. */
7473DECL_INLINE_THROW(uint32_t)
7474iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
7475{
7476 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
7477 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
7478 Assert(iGReg < 16);
7479
7480 /*
7481 * We can either just load the low 16-bit of the GPR into a host register
7482 * for the variable, or we can do so via a shadow copy host register. The
7483 * latter will avoid having to reload it if it's being stored later, but
7484 * will waste a host register if it isn't touched again. Since we don't
7485 * know what going to happen, we choose the latter for now.
7486 */
7487 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7488 kIemNativeGstRegUse_ReadOnly);
7489
7490 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7491 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
7492 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
7493
7494 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7495 return off;
7496}
7497
7498
7499
7500/*********************************************************************************************************************************
7501* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
7502*********************************************************************************************************************************/
7503
7504#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
7505 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
7506
7507/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
7508DECL_INLINE_THROW(uint32_t)
7509iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
7510{
7511 Assert(iGRegEx < 20);
7512 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7513 kIemNativeGstRegUse_ForUpdate);
7514#ifdef RT_ARCH_AMD64
7515 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
7516
7517 /* To the lowest byte of the register: mov r8, imm8 */
7518 if (iGRegEx < 16)
7519 {
7520 if (idxGstTmpReg >= 8)
7521 pbCodeBuf[off++] = X86_OP_REX_B;
7522 else if (idxGstTmpReg >= 4)
7523 pbCodeBuf[off++] = X86_OP_REX;
7524 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
7525 pbCodeBuf[off++] = u8Value;
7526 }
7527 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
7528 else if (idxGstTmpReg < 4)
7529 {
7530 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
7531 pbCodeBuf[off++] = u8Value;
7532 }
7533 else
7534 {
7535 /* ror reg64, 8 */
7536 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7537 pbCodeBuf[off++] = 0xc1;
7538 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7539 pbCodeBuf[off++] = 8;
7540
7541 /* mov reg8, imm8 */
7542 if (idxGstTmpReg >= 8)
7543 pbCodeBuf[off++] = X86_OP_REX_B;
7544 else if (idxGstTmpReg >= 4)
7545 pbCodeBuf[off++] = X86_OP_REX;
7546 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
7547 pbCodeBuf[off++] = u8Value;
7548
7549 /* rol reg64, 8 */
7550 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7551 pbCodeBuf[off++] = 0xc1;
7552 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
7553 pbCodeBuf[off++] = 8;
7554 }
7555
7556#elif defined(RT_ARCH_ARM64)
7557 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
7558 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7559 if (iGRegEx < 16)
7560 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
7561 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
7562 else
7563 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
7564 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
7565 iemNativeRegFreeTmp(pReNative, idxImmReg);
7566
7567#else
7568# error "Port me!"
7569#endif
7570
7571 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7572
7573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
7574
7575 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7576 return off;
7577}
7578
7579
7580#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
7581 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
7582
7583/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
7584DECL_INLINE_THROW(uint32_t)
7585iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
7586{
7587 Assert(iGRegEx < 20);
7588 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
7589
7590 /*
7591 * If it's a constant value (unlikely) we treat this as a
7592 * IEM_MC_STORE_GREG_U8_CONST statement.
7593 */
7594 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
7595 { /* likely */ }
7596 else
7597 {
7598 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
7599 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7600 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
7601 }
7602
7603 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7604 kIemNativeGstRegUse_ForUpdate);
7605 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxValueVar, &off, true /*fInitialized*/);
7606
7607#ifdef RT_ARCH_AMD64
7608 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
7609 if (iGRegEx < 16)
7610 {
7611 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7612 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
7613 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
7614 else if (idxGstTmpReg >= 4)
7615 pbCodeBuf[off++] = X86_OP_REX;
7616 pbCodeBuf[off++] = 0x8a;
7617 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
7618 }
7619 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
7620 else if (idxGstTmpReg < 4 && idxVarReg < 4)
7621 {
7622 /** @todo test this. */
7623 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
7624 pbCodeBuf[off++] = 0x8a;
7625 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
7626 }
7627 else
7628 {
7629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
7630
7631 /* ror reg64, 8 */
7632 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7633 pbCodeBuf[off++] = 0xc1;
7634 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7635 pbCodeBuf[off++] = 8;
7636
7637 /* mov reg8, reg8(r/m) */
7638 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
7639 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
7640 else if (idxGstTmpReg >= 4)
7641 pbCodeBuf[off++] = X86_OP_REX;
7642 pbCodeBuf[off++] = 0x8a;
7643 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
7644
7645 /* rol reg64, 8 */
7646 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
7647 pbCodeBuf[off++] = 0xc1;
7648 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
7649 pbCodeBuf[off++] = 8;
7650 }
7651
7652#elif defined(RT_ARCH_ARM64)
7653 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
7654 or
7655 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
7656 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7657 if (iGRegEx < 16)
7658 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
7659 else
7660 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
7661
7662#else
7663# error "Port me!"
7664#endif
7665
7666 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7667
7668 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
7669 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7670 return off;
7671}
7672
7673
7674
7675#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
7676 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
7677
7678/** Emits code for IEM_MC_STORE_GREG_U16. */
7679DECL_INLINE_THROW(uint32_t)
7680iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
7681{
7682 Assert(iGReg < 16);
7683 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7684 kIemNativeGstRegUse_ForUpdate);
7685#ifdef RT_ARCH_AMD64
7686 /* mov reg16, imm16 */
7687 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7688 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7689 if (idxGstTmpReg >= 8)
7690 pbCodeBuf[off++] = X86_OP_REX_B;
7691 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
7692 pbCodeBuf[off++] = RT_BYTE1(uValue);
7693 pbCodeBuf[off++] = RT_BYTE2(uValue);
7694
7695#elif defined(RT_ARCH_ARM64)
7696 /* movk xdst, #uValue, lsl #0 */
7697 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7698 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
7699
7700#else
7701# error "Port me!"
7702#endif
7703
7704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7705
7706 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7707 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7708 return off;
7709}
7710
7711
7712#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
7713 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
7714
7715/** Emits code for IEM_MC_STORE_GREG_U16. */
7716DECL_INLINE_THROW(uint32_t)
7717iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
7718{
7719 Assert(iGReg < 16);
7720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
7721
7722 /*
7723 * If it's a constant value (unlikely) we treat this as a
7724 * IEM_MC_STORE_GREG_U16_CONST statement.
7725 */
7726 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
7727 { /* likely */ }
7728 else
7729 {
7730 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
7731 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7732 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
7733 }
7734
7735 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7736 kIemNativeGstRegUse_ForUpdate);
7737
7738#ifdef RT_ARCH_AMD64
7739 /* mov reg16, reg16 or [mem16] */
7740 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
7741 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7742 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7743 {
7744 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
7745 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
7746 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
7747 pbCodeBuf[off++] = 0x8b;
7748 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
7749 }
7750 else
7751 {
7752 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
7753 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7754 if (idxGstTmpReg >= 8)
7755 pbCodeBuf[off++] = X86_OP_REX_R;
7756 pbCodeBuf[off++] = 0x8b;
7757 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7758 }
7759
7760#elif defined(RT_ARCH_ARM64)
7761 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
7762 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxValueVar, &off, true /*fInitialized*/);
7763 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7764 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
7765
7766#else
7767# error "Port me!"
7768#endif
7769
7770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7771
7772 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7773 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7774 return off;
7775}
7776
7777
7778#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
7779 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
7780
7781/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
7782DECL_INLINE_THROW(uint32_t)
7783iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
7784{
7785 Assert(iGReg < 16);
7786 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7787 kIemNativeGstRegUse_ForFullWrite);
7788 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
7789 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7790 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7791 return off;
7792}
7793
7794
7795#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
7796 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
7797
7798/** Emits code for IEM_MC_STORE_GREG_U32. */
7799DECL_INLINE_THROW(uint32_t)
7800iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
7801{
7802 Assert(iGReg < 16);
7803 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
7804
7805 /*
7806 * If it's a constant value (unlikely) we treat this as a
7807 * IEM_MC_STORE_GREG_U32_CONST statement.
7808 */
7809 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
7810 { /* likely */ }
7811 else
7812 {
7813 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
7814 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7815 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
7816 }
7817
7818 /*
7819 * For the rest we allocate a guest register for the variable and writes
7820 * it to the CPUMCTX structure.
7821 */
7822 uint8_t const idxVarReg = iemNativeVarAllocRegisterForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
7823 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7824#ifdef VBOX_STRICT
7825 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
7826#endif
7827 return off;
7828}
7829
7830
7831
7832#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
7833 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
7834
7835/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
7836DECL_INLINE_THROW(uint32_t)
7837iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
7838{
7839 Assert(iGReg < 16);
7840 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7841 kIemNativeGstRegUse_ForUpdate);
7842 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
7843 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7844 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7845 return off;
7846}
7847
7848
7849/*********************************************************************************************************************************
7850* General purpose register manipulation (add, sub). *
7851*********************************************************************************************************************************/
7852
7853#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
7854 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
7855
7856/** Emits code for IEM_MC_SUB_GREG_U16. */
7857DECL_INLINE_THROW(uint32_t)
7858iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
7859{
7860 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7861 kIemNativeGstRegUse_ForUpdate);
7862
7863#ifdef RT_ARCH_AMD64
7864 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
7865 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7866 if (idxGstTmpReg >= 8)
7867 pbCodeBuf[off++] = X86_OP_REX_B;
7868 if (uSubtrahend)
7869 {
7870 pbCodeBuf[off++] = 0xff; /* dec */
7871 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7872 }
7873 else
7874 {
7875 pbCodeBuf[off++] = 0x81;
7876 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
7877 pbCodeBuf[off++] = uSubtrahend;
7878 pbCodeBuf[off++] = 0;
7879 }
7880
7881#else
7882 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7883 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7884
7885 /* sub tmp, gstgrp, uSubtrahend */
7886 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
7887
7888 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
7889 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
7890
7891 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7892#endif
7893
7894 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7895
7896 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7897
7898 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7899 return off;
7900}
7901
7902
7903#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
7904 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
7905
7906#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
7907 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
7908
7909/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
7910DECL_INLINE_THROW(uint32_t)
7911iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
7912{
7913 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7914 kIemNativeGstRegUse_ForUpdate);
7915
7916#ifdef RT_ARCH_AMD64
7917 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7918 if (f64Bit)
7919 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
7920 else if (idxGstTmpReg >= 8)
7921 pbCodeBuf[off++] = X86_OP_REX_B;
7922 if (uSubtrahend == 1)
7923 {
7924 /* dec */
7925 pbCodeBuf[off++] = 0xff;
7926 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7927 }
7928 else if (uSubtrahend < 128)
7929 {
7930 pbCodeBuf[off++] = 0x83; /* sub */
7931 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
7932 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
7933 }
7934 else
7935 {
7936 pbCodeBuf[off++] = 0x81; /* sub */
7937 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
7938 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
7939 pbCodeBuf[off++] = 0;
7940 pbCodeBuf[off++] = 0;
7941 pbCodeBuf[off++] = 0;
7942 }
7943
7944#else
7945 /* sub tmp, gstgrp, uSubtrahend */
7946 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7947 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
7948
7949#endif
7950
7951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7952
7953 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7954
7955 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7956 return off;
7957}
7958
7959
7960
7961/*********************************************************************************************************************************
7962* EFLAGS *
7963*********************************************************************************************************************************/
7964
7965#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
7966 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
7967
7968/** Handles IEM_MC_FETCH_EFLAGS. */
7969DECL_INLINE_THROW(uint32_t)
7970iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
7971{
7972 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
7973 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
7974
7975 uint8_t const idxReg = iemNativeVarAllocRegister(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
7976 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
7977 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7978}
7979
7980
7981#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
7982 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
7983
7984/** Handles IEM_MC_COMMIT_EFLAGS. */
7985DECL_INLINE_THROW(uint32_t)
7986iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
7987{
7988 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
7989 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
7990
7991 uint8_t const idxReg = iemNativeVarAllocRegister(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
7992
7993#ifdef VBOX_STRICT
7994 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
7995 off = iemNativeEmitJnzToFixed(pReNative, off, 1);
7996 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
7997
7998 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
7999 off = iemNativeEmitJzToFixed(pReNative, off, 1);
8000 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
8001#endif
8002
8003 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8004 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
8005}
8006
8007
8008
8009/*********************************************************************************************************************************
8010* Register references. *
8011*********************************************************************************************************************************/
8012
8013#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
8014 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
8015
8016#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
8017 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
8018
8019/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
8020DECL_INLINE_THROW(uint32_t)
8021iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
8022{
8023 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8024 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8025 Assert(iGRegEx < 20);
8026
8027 if (iGRegEx < 16)
8028 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8029 else
8030 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
8031
8032 /* If we've delayed writing back the register value, flush it now. */
8033 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8034
8035 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8036 if (!fConst)
8037 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
8038
8039 return off;
8040}
8041
8042#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
8043 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
8044
8045#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
8046 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
8047
8048#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
8049 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
8050
8051#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
8052 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
8053
8054#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
8055 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
8056
8057#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
8058 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
8059
8060#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
8061 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
8062
8063#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
8064 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
8065
8066#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
8067 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
8068
8069#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
8070 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
8071
8072/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
8073DECL_INLINE_THROW(uint32_t)
8074iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
8075{
8076 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8077 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8078 Assert(iGReg < 16);
8079
8080 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
8081
8082 /* If we've delayed writing back the register value, flush it now. */
8083 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
8084
8085 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8086 if (!fConst)
8087 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
8088
8089 return off;
8090}
8091
8092
8093#define IEM_MC_REF_EFLAGS(a_pEFlags) \
8094 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
8095
8096/** Handles IEM_MC_REF_EFLAGS. */
8097DECL_INLINE_THROW(uint32_t)
8098iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
8099{
8100 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8101 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8102
8103 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
8104
8105 /* If we've delayed writing back the register value, flush it now. */
8106 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
8107
8108 /* If there is a shadow copy of guest EFLAGS, flush it now. */
8109 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
8110
8111 return off;
8112}
8113
8114
8115/*********************************************************************************************************************************
8116* Effective Address Calculation *
8117*********************************************************************************************************************************/
8118#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
8119 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
8120
8121/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
8122 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
8123DECL_INLINE_THROW(uint32_t)
8124iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8125 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
8126{
8127 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8128
8129 /*
8130 * Handle the disp16 form with no registers first.
8131 *
8132 * Convert to an immediate value, as that'll delay the register allocation
8133 * and assignment till the memory access / call / whatever and we can use
8134 * a more appropriate register (or none at all).
8135 */
8136 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
8137 {
8138 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
8139 return off;
8140 }
8141
8142 /* Determin the displacment. */
8143 uint16_t u16EffAddr;
8144 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8145 {
8146 case 0: u16EffAddr = 0; break;
8147 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
8148 case 2: u16EffAddr = u16Disp; break;
8149 default: AssertFailedStmt(u16EffAddr = 0);
8150 }
8151
8152 /* Determine the registers involved. */
8153 uint8_t idxGstRegBase;
8154 uint8_t idxGstRegIndex;
8155 switch (bRm & X86_MODRM_RM_MASK)
8156 {
8157 case 0:
8158 idxGstRegBase = X86_GREG_xBX;
8159 idxGstRegIndex = X86_GREG_xSI;
8160 break;
8161 case 1:
8162 idxGstRegBase = X86_GREG_xBX;
8163 idxGstRegIndex = X86_GREG_xDI;
8164 break;
8165 case 2:
8166 idxGstRegBase = X86_GREG_xBP;
8167 idxGstRegIndex = X86_GREG_xSI;
8168 break;
8169 case 3:
8170 idxGstRegBase = X86_GREG_xBP;
8171 idxGstRegIndex = X86_GREG_xDI;
8172 break;
8173 case 4:
8174 idxGstRegBase = X86_GREG_xSI;
8175 idxGstRegIndex = UINT8_MAX;
8176 break;
8177 case 5:
8178 idxGstRegBase = X86_GREG_xDI;
8179 idxGstRegIndex = UINT8_MAX;
8180 break;
8181 case 6:
8182 idxGstRegBase = X86_GREG_xBP;
8183 idxGstRegIndex = UINT8_MAX;
8184 break;
8185#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
8186 default:
8187#endif
8188 case 7:
8189 idxGstRegBase = X86_GREG_xBX;
8190 idxGstRegIndex = UINT8_MAX;
8191 break;
8192 }
8193
8194 /*
8195 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
8196 */
8197 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
8198 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
8199 kIemNativeGstRegUse_ReadOnly);
8200 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
8201 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
8202 kIemNativeGstRegUse_ReadOnly)
8203 : UINT8_MAX;
8204#ifdef RT_ARCH_AMD64
8205 if (idxRegIndex == UINT8_MAX)
8206 {
8207 if (u16EffAddr == 0)
8208 {
8209 /* movxz ret, base */
8210 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
8211 }
8212 else
8213 {
8214 /* lea ret32, [base64 + disp32] */
8215 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
8216 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8217 if (idxRegRet >= 8 || idxRegBase >= 8)
8218 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
8219 pbCodeBuf[off++] = 0x8d;
8220 if (idxRegBase != X86_GREG_x12 /*SIB*/)
8221 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
8222 else
8223 {
8224 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
8225 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
8226 }
8227 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
8228 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
8229 pbCodeBuf[off++] = 0;
8230 pbCodeBuf[off++] = 0;
8231 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8232
8233 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
8234 }
8235 }
8236 else
8237 {
8238 /* lea ret32, [index64 + base64 (+ disp32)] */
8239 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
8240 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8241 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
8242 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
8243 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
8244 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
8245 pbCodeBuf[off++] = 0x8d;
8246 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
8247 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
8248 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
8249 if (bMod == X86_MOD_MEM4)
8250 {
8251 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
8252 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
8253 pbCodeBuf[off++] = 0;
8254 pbCodeBuf[off++] = 0;
8255 }
8256 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8257 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
8258 }
8259
8260#elif defined(RT_ARCH_ARM64)
8261 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8262 if (u16EffAddr == 0)
8263 {
8264 if (idxRegIndex == UINT8_MAX)
8265 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
8266 else
8267 {
8268 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
8269 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
8270 }
8271 }
8272 else
8273 {
8274 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
8275 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
8276 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
8277 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
8278 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
8279 else
8280 {
8281 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
8282 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
8283 }
8284 if (idxRegIndex != UINT8_MAX)
8285 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
8286 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
8287 }
8288
8289#else
8290# error "port me"
8291#endif
8292
8293 if (idxRegIndex != UINT8_MAX)
8294 iemNativeRegFreeTmp(pReNative, idxRegIndex);
8295 iemNativeRegFreeTmp(pReNative, idxRegBase);
8296 return off;
8297}
8298
8299
8300#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
8301 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
8302
8303/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
8304 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
8305DECL_INLINE_THROW(uint32_t)
8306iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8307 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
8308{
8309 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8310
8311 /*
8312 * Handle the disp32 form with no registers first.
8313 *
8314 * Convert to an immediate value, as that'll delay the register allocation
8315 * and assignment till the memory access / call / whatever and we can use
8316 * a more appropriate register (or none at all).
8317 */
8318 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
8319 {
8320 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
8321 return off;
8322 }
8323
8324 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
8325 uint32_t u32EffAddr = 0;
8326 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8327 {
8328 case 0: break;
8329 case 1: u32EffAddr = (int8_t)u32Disp; break;
8330 case 2: u32EffAddr = u32Disp; break;
8331 default: AssertFailed();
8332 }
8333
8334 /* Get the register (or SIB) value. */
8335 uint8_t idxGstRegBase = UINT8_MAX;
8336 uint8_t idxGstRegIndex = UINT8_MAX;
8337 uint8_t cShiftIndex = 0;
8338 switch (bRm & X86_MODRM_RM_MASK)
8339 {
8340 case 0: idxGstRegBase = X86_GREG_xAX; break;
8341 case 1: idxGstRegBase = X86_GREG_xCX; break;
8342 case 2: idxGstRegBase = X86_GREG_xDX; break;
8343 case 3: idxGstRegBase = X86_GREG_xBX; break;
8344 case 4: /* SIB */
8345 {
8346 /* index /w scaling . */
8347 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
8348 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
8349 {
8350 case 0: idxGstRegIndex = X86_GREG_xAX; break;
8351 case 1: idxGstRegIndex = X86_GREG_xCX; break;
8352 case 2: idxGstRegIndex = X86_GREG_xDX; break;
8353 case 3: idxGstRegIndex = X86_GREG_xBX; break;
8354 case 4: cShiftIndex = 0; /*no index*/ break;
8355 case 5: idxGstRegIndex = X86_GREG_xBP; break;
8356 case 6: idxGstRegIndex = X86_GREG_xSI; break;
8357 case 7: idxGstRegIndex = X86_GREG_xDI; break;
8358 }
8359
8360 /* base */
8361 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
8362 {
8363 case 0: idxGstRegBase = X86_GREG_xAX; break;
8364 case 1: idxGstRegBase = X86_GREG_xCX; break;
8365 case 2: idxGstRegBase = X86_GREG_xDX; break;
8366 case 3: idxGstRegBase = X86_GREG_xBX; break;
8367 case 4:
8368 idxGstRegBase = X86_GREG_xSP;
8369 u32EffAddr += uSibAndRspOffset >> 8;
8370 break;
8371 case 5:
8372 if ((bRm & X86_MODRM_MOD_MASK) != 0)
8373 idxGstRegBase = X86_GREG_xBP;
8374 else
8375 {
8376 Assert(u32EffAddr == 0);
8377 u32EffAddr = u32Disp;
8378 }
8379 break;
8380 case 6: idxGstRegBase = X86_GREG_xSI; break;
8381 case 7: idxGstRegBase = X86_GREG_xDI; break;
8382 }
8383 break;
8384 }
8385 case 5: idxGstRegBase = X86_GREG_xBP; break;
8386 case 6: idxGstRegBase = X86_GREG_xSI; break;
8387 case 7: idxGstRegBase = X86_GREG_xDI; break;
8388 }
8389
8390 /*
8391 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
8392 * the start of the function.
8393 */
8394 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
8395 {
8396 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
8397 return off;
8398 }
8399
8400 /*
8401 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
8402 */
8403 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
8404 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
8405 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
8406 kIemNativeGstRegUse_ReadOnly);
8407 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
8408 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
8409 kIemNativeGstRegUse_ReadOnly);
8410
8411 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
8412 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
8413 {
8414 idxRegBase = idxRegIndex;
8415 idxRegIndex = UINT8_MAX;
8416 }
8417
8418#ifdef RT_ARCH_AMD64
8419 if (idxRegIndex == UINT8_MAX)
8420 {
8421 if (u32EffAddr == 0)
8422 {
8423 /* mov ret, base */
8424 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
8425 }
8426 else
8427 {
8428 /* lea ret32, [base64 + disp32] */
8429 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
8430 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8431 if (idxRegRet >= 8 || idxRegBase >= 8)
8432 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
8433 pbCodeBuf[off++] = 0x8d;
8434 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
8435 if (idxRegBase != X86_GREG_x12 /*SIB*/)
8436 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
8437 else
8438 {
8439 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
8440 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
8441 }
8442 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
8443 if (bMod == X86_MOD_MEM4)
8444 {
8445 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
8446 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
8447 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
8448 }
8449 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8450 }
8451 }
8452 else
8453 {
8454 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
8455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8456 if (idxRegBase == UINT8_MAX)
8457 {
8458 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
8459 if (idxRegRet >= 8 || idxRegIndex >= 8)
8460 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
8461 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
8462 pbCodeBuf[off++] = 0x8d;
8463 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
8464 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
8465 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
8466 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
8467 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
8468 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
8469 }
8470 else
8471 {
8472 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
8473 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
8474 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
8475 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
8476 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
8477 pbCodeBuf[off++] = 0x8d;
8478 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
8479 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
8480 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
8481 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
8482 if (bMod != X86_MOD_MEM0)
8483 {
8484 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
8485 if (bMod == X86_MOD_MEM4)
8486 {
8487 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
8488 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
8489 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
8490 }
8491 }
8492 }
8493 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8494 }
8495
8496#elif defined(RT_ARCH_ARM64)
8497 if (u32EffAddr == 0)
8498 {
8499 if (idxRegIndex == UINT8_MAX)
8500 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
8501 else if (idxRegBase == UINT8_MAX)
8502 {
8503 if (cShiftIndex == 0)
8504 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
8505 else
8506 {
8507 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8508 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
8509 }
8510 }
8511 else
8512 {
8513 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8514 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
8515 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
8516 }
8517 }
8518 else
8519 {
8520 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
8521 {
8522 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8523 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
8524 }
8525 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
8526 {
8527 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8528 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
8529 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
8530 }
8531 else
8532 {
8533 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
8534 if (idxRegBase != UINT8_MAX)
8535 {
8536 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8537 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
8538 }
8539 }
8540 if (idxRegIndex != UINT8_MAX)
8541 {
8542 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8543 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
8544 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
8545 }
8546 }
8547
8548#else
8549# error "port me"
8550#endif
8551
8552 if (idxRegIndex != UINT8_MAX)
8553 iemNativeRegFreeTmp(pReNative, idxRegIndex);
8554 if (idxRegBase != UINT8_MAX)
8555 iemNativeRegFreeTmp(pReNative, idxRegBase);
8556 return off;
8557}
8558
8559
8560#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
8561 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff)
8562
8563#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
8564 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 64)
8565
8566#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
8567 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 32)
8568
8569
8570
8571/*********************************************************************************************************************************
8572* Memory fetches and stores common *
8573*********************************************************************************************************************************/
8574
8575typedef enum IEMNATIVEMITMEMOP
8576{
8577 kIemNativeEmitMemOp_Store = 0,
8578 kIemNativeEmitMemOp_Fetch,
8579 kIemNativeEmitMemOp_Fetch_Zx_U16,
8580 kIemNativeEmitMemOp_Fetch_Zx_U32,
8581 kIemNativeEmitMemOp_Fetch_Zx_U64,
8582 kIemNativeEmitMemOp_Fetch_Sx_U16,
8583 kIemNativeEmitMemOp_Fetch_Sx_U32,
8584 kIemNativeEmitMemOp_Fetch_Sx_U64
8585} IEMNATIVEMITMEMOP;
8586
8587/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
8588 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
8589 * (with iSegReg = UINT8_MAX). */
8590DECL_INLINE_THROW(uint32_t)
8591iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
8592 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
8593 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
8594{
8595 /*
8596 * Assert sanity.
8597 */
8598 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8599 Assert( enmOp != kIemNativeEmitMemOp_Store
8600 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
8601 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
8602 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8603 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
8604 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
8605 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8606 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8607 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
8608 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8609#ifdef VBOX_STRICT
8610 if (iSegReg == UINT8_MAX)
8611 {
8612 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8613 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8614 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8615 switch (cbMem)
8616 {
8617 case 1:
8618 Assert( pfnFunction
8619 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
8620 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8621 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8622 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8623 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
8624 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
8625 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
8626 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
8627 : UINT64_C(0xc000b000a0009000) ));
8628 break;
8629 case 2:
8630 Assert( pfnFunction
8631 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
8632 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
8633 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
8634 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
8635 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
8636 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
8637 : UINT64_C(0xc000b000a0009000) ));
8638 break;
8639 case 4:
8640 Assert( pfnFunction
8641 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
8642 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
8643 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
8644 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
8645 : UINT64_C(0xc000b000a0009000) ));
8646 break;
8647 case 8:
8648 Assert( pfnFunction
8649 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
8650 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
8651 : UINT64_C(0xc000b000a0009000) ));
8652 break;
8653 }
8654 }
8655 else
8656 {
8657 Assert(iSegReg < 6);
8658 switch (cbMem)
8659 {
8660 case 1:
8661 Assert( pfnFunction
8662 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
8663 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
8664 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
8665 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
8666 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
8667 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
8668 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
8669 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
8670 : UINT64_C(0xc000b000a0009000) ));
8671 break;
8672 case 2:
8673 Assert( pfnFunction
8674 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
8675 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
8676 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
8677 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
8678 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
8679 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
8680 : UINT64_C(0xc000b000a0009000) ));
8681 break;
8682 case 4:
8683 Assert( pfnFunction
8684 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
8685 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
8686 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
8687 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
8688 : UINT64_C(0xc000b000a0009000) ));
8689 break;
8690 case 8:
8691 Assert( pfnFunction
8692 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
8693 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
8694 : UINT64_C(0xc000b000a0009000) ));
8695 break;
8696 }
8697 }
8698#endif
8699
8700#ifdef VBOX_STRICT
8701 /*
8702 * Check that the fExec flags we've got make sense.
8703 */
8704 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8705#endif
8706
8707 /*
8708 * To keep things simple we have to commit any pending writes first as we
8709 * may end up making calls.
8710 */
8711 /** @todo we could postpone this till we make the call and reload the
8712 * registers after returning from the call. Not sure if that's sensible or
8713 * not, though. */
8714 off = iemNativeRegFlushPendingWrites(pReNative, off);
8715
8716 /*
8717 * Move/spill/flush stuff out of call-volatile registers.
8718 * This is the easy way out. We could contain this to the tlb-miss branch
8719 * by saving and restoring active stuff here.
8720 */
8721 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8722 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8723
8724 /*
8725 * Define labels and allocate the result register (trying for the return
8726 * register if we can).
8727 */
8728 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8729 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8730 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8731 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
8732 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8733 ? iemNativeVarSetRegister(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, off)
8734 : iemNativeVarAllocRegister(pReNative, idxVarValue, &off);
8735
8736 /*
8737 * First we try to go via the TLB.
8738 */
8739//pReNative->pInstrBuf[off++] = 0xcc;
8740 /** @todo later. */
8741 RT_NOREF(fAlignMask, cbMem);
8742
8743 /*
8744 * Call helper to do the fetching.
8745 * We flush all guest register shadow copies here.
8746 */
8747 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8748
8749#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8750 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8751#else
8752 RT_NOREF(idxInstr);
8753#endif
8754
8755 uint8_t idxRegArgValue;
8756 if (iSegReg == UINT8_MAX)
8757 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
8758 else
8759 {
8760 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
8761 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8762 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
8763
8764 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
8765 }
8766
8767 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
8768 if (enmOp == kIemNativeEmitMemOp_Store)
8769 {
8770 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
8771 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
8772 else
8773 {
8774 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
8775 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
8776 {
8777 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8778 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
8779 }
8780 else
8781 {
8782 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
8783 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8784 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
8785 }
8786 }
8787 }
8788
8789 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
8790 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
8791 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
8792 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
8793 else
8794 {
8795 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
8796 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
8797 {
8798 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8799 if (!offDisp)
8800 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
8801 else
8802 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
8803 }
8804 else
8805 {
8806 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
8807 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8808 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
8809 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
8810 if (offDisp)
8811 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
8812 }
8813 }
8814
8815 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8816 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8817
8818 /* Done setting up parameters, make the call. */
8819 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8820
8821 /*
8822 * Put the result in the right register if this is a fetch.
8823 */
8824 if (enmOp != kIemNativeEmitMemOp_Store)
8825 {
8826 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
8827 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
8828 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
8829 }
8830
8831 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8832
8833 return off;
8834}
8835
8836
8837
8838/*********************************************************************************************************************************
8839* Memory fetches (IEM_MEM_FETCH_XXX). *
8840*********************************************************************************************************************************/
8841
8842/* 8-bit segmented: */
8843#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
8844 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
8845 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
8846 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
8847
8848#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
8849 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
8850 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
8851 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
8852
8853#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
8854 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
8855 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
8856 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
8857
8858#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
8859 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
8860 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8861 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
8862
8863#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
8864 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
8865 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
8866 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
8867
8868#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
8869 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
8870 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8871 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
8872
8873#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
8874 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
8875 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8876 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
8877
8878/* 16-bit segmented: */
8879#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
8880 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
8881 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
8882 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
8883
8884#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
8885 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
8886 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
8887 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
8888
8889#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
8890 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
8891 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
8892 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
8893
8894#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
8895 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
8896 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8897 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
8898
8899#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
8900 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
8901 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8902 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
8903
8904#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
8905 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
8906 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8907 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
8908
8909
8910/* 32-bit segmented: */
8911#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
8912 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
8913 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8914 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
8915
8916#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
8917 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
8918 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8919 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
8920
8921#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
8922 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
8923 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8924 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
8925
8926#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
8927 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
8928 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8929 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
8930
8931
8932/* 64-bit segmented: */
8933#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
8934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
8935 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
8936 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
8937
8938
8939
8940/* 8-bit flat: */
8941#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
8942 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
8943 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
8944 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
8945
8946#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
8947 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
8948 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
8949 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
8950
8951#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
8952 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8953 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
8954 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
8955
8956#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
8957 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8958 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8959 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
8960
8961#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
8962 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
8963 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
8964 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
8965
8966#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
8967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8968 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8969 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
8970
8971#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
8972 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8973 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8974 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
8975
8976
8977/* 16-bit flat: */
8978#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
8979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
8980 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
8981 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
8982
8983#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
8984 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
8985 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
8986 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
8987
8988#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
8989 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8990 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
8991 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
8992
8993#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
8994 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8995 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8996 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
8997
8998#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
8999 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9000 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
9001 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
9002
9003#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
9004 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9005 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9006 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
9007
9008/* 32-bit flat: */
9009#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
9010 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9011 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9012 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
9013
9014#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
9015 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
9016 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
9017 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
9018
9019#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
9020 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9021 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
9022 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
9023
9024#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
9025 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9026 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
9027 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
9028
9029/* 64-bit flat: */
9030#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
9031 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
9032 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
9033 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
9034
9035
9036
9037/*********************************************************************************************************************************
9038* Memory stores (IEM_MEM_STORE_XXX). *
9039*********************************************************************************************************************************/
9040
9041#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
9042 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
9043 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
9044 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
9045
9046#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
9047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
9048 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
9049 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
9050
9051#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
9052 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
9053 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
9054 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
9055
9056#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
9057 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
9058 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
9059 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
9060
9061
9062#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
9063 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
9064 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
9065 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
9066
9067#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
9068 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
9069 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
9070 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
9071
9072#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
9073 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
9074 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
9075 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
9076
9077#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
9078 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
9079 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
9080 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
9081
9082
9083#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
9084 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9085 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
9086
9087#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
9088 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9089 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
9090
9091#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
9092 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9093 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
9094
9095#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
9096 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9097 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
9098
9099
9100#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
9101 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9102 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
9103
9104#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
9105 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9106 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
9107
9108#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
9109 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9110 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
9111
9112#define IEM_MC_STORE_MEM_FLAT_U64_CONST(_GCPtrMem, a_u64ConstValue) \
9113 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9114 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
9115
9116/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
9117 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
9118DECL_INLINE_THROW(uint32_t)
9119iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
9120 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
9121{
9122 /*
9123 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
9124 * to do the grunt work.
9125 */
9126 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
9127 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
9128 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
9129 pfnFunction, idxInstr);
9130 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
9131 return off;
9132}
9133
9134
9135
9136/*********************************************************************************************************************************
9137* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
9138*********************************************************************************************************************************/
9139
9140#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9141 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9142 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9143 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
9144
9145#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9146 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9147 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9148 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
9149
9150#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9151 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9152 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
9153 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
9154
9155
9156#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9157 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9158 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9159 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
9160
9161#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9162 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9163 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9164 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9165
9166#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9167 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9168 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9169 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
9170
9171#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9172 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
9173 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9174 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9175
9176
9177#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9178 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9179 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9180 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
9181
9182#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9183 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9184 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9185 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9186
9187#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9188 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9189 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9190 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
9191
9192#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9193 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
9194 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9195 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9196
9197
9198#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9199 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9200 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9201 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
9202
9203#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9204 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9205 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9206 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9207
9208#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9209 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9210 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9211 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9212
9213#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9214 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
9215 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9216 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9217
9218
9219#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9220 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9221 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9222 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9223
9224#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9225 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9226 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
9227 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9228
9229
9230#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9231 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9232 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9233 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9234
9235#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9236 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9237 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9238 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9239
9240#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9241 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9242 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9243 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9244
9245
9246
9247#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9248 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9249 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9250 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9251
9252#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9253 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9254 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
9255 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9256
9257#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9258 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9259 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
9260 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9261
9262
9263#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9264 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9265 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9266 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9267
9268#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9269 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9270 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9271 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9272
9273#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9274 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9275 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9276 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9277
9278#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9279 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9280 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
9281 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9282
9283
9284#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9285 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9286 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9287 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9288
9289#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9290 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9291 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9292 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9293
9294#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9295 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9296 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9297 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9298
9299#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9300 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9301 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
9302 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9303
9304
9305#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9306 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9307 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9308 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9309
9310#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9311 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9312 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9313 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9314
9315#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9316 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9317 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9318 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9319
9320#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9321 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9322 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9323 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9324
9325
9326#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9327 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9328 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
9329 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9330
9331#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9333 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
9334 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9335
9336
9337#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9339 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9340 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9341
9342#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9343 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9344 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9345 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9346
9347#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9348 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9349 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
9350 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9351
9352
9353DECL_INLINE_THROW(uint32_t)
9354iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9355 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
9356 uintptr_t pfnFunction, uint8_t idxInstr)
9357{
9358 /*
9359 * Assert sanity.
9360 */
9361 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9362 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
9363 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
9364 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9365
9366 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9367 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
9368 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
9369 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9370
9371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9372 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
9373 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
9374 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9375
9376 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9377
9378 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9379
9380#ifdef VBOX_STRICT
9381# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9382 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9383 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9384 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
9385 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9386
9387 if (iSegReg == UINT8_MAX)
9388 {
9389 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9390 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9391 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9392 switch (cbMem)
9393 {
9394 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
9395 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
9396 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
9397 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
9398 case 10:
9399 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9400 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9401 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9402 break;
9403 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
9404# if 0
9405 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
9406 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
9407# endif
9408 default: AssertFailed(); break;
9409 }
9410 }
9411 else
9412 {
9413 Assert(iSegReg < 6);
9414 switch (cbMem)
9415 {
9416 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
9417 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
9418 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
9419 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
9420 case 10:
9421 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9422 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9423 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9424 break;
9425 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
9426# if 0
9427 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
9428 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
9429# endif
9430 default: AssertFailed(); break;
9431 }
9432 }
9433# undef IEM_MAP_HLP_FN
9434#endif
9435
9436#ifdef VBOX_STRICT
9437 /*
9438 * Check that the fExec flags we've got make sense.
9439 */
9440 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9441#endif
9442
9443 /*
9444 * To keep things simple we have to commit any pending writes first as we
9445 * may end up making calls.
9446 */
9447 /** @todo we could postpone this till we make the call and reload the
9448 * registers after returning from the call. Not sure if that's sensible or
9449 * not, though. */
9450 off = iemNativeRegFlushPendingWrites(pReNative, off);
9451
9452 /*
9453 * Move/spill/flush stuff out of call-volatile registers.
9454 * This is the easy way out. We could contain this to the tlb-miss branch
9455 * by saving and restoring active stuff here.
9456 */
9457 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9458 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9459
9460 /*
9461 * Define labels and allocate the result register (trying for the return
9462 * register if we can - which we of course can, given the above call).
9463 */
9464 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9465 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
9466 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9467 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9468 ? iemNativeVarSetRegister(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, off)
9469 : iemNativeVarAllocRegister(pReNative, idxVarMem, &off);
9470
9471 /*
9472 * First we try to go via the TLB.
9473 */
9474//pReNative->pInstrBuf[off++] = 0xcc;
9475 /** @todo later. */
9476 RT_NOREF(fAccess, fAlignMask, cbMem);
9477
9478 /*
9479 * Call helper to do the fetching.
9480 * We flush all guest register shadow copies here.
9481 */
9482 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
9483
9484#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9485 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9486#else
9487 RT_NOREF(idxInstr);
9488#endif
9489
9490 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9491 if (iSegReg != UINT8_MAX)
9492 {
9493 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9494 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9495 }
9496
9497 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem */
9498 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem);
9499
9500 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo */
9501 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9502 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, true /*fFlushShadows*/);
9503
9504 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9505 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9506
9507 /* Done setting up parameters, make the call. */
9508 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9509
9510 /*
9511 * Put the result in the right register .
9512 */
9513 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
9514 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9515 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9516
9517 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9518
9519 return off;
9520}
9521
9522
9523#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9524 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
9525 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9526
9527#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9528 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
9529 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9530
9531#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9532 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
9533 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9534
9535DECL_INLINE_THROW(uint32_t)
9536iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9537 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9538{
9539 /*
9540 * Assert sanity.
9541 */
9542 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9543 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
9544 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9545 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9546#ifdef VBOX_STRICT
9547 switch (fAccess & IEM_ACCESS_TYPE_MASK)
9548 {
9549 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9550 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9551 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9552 default: AssertFailed();
9553 }
9554#endif
9555
9556 /*
9557 * To keep things simple we have to commit any pending writes first as we
9558 * may end up making calls (there shouldn't be any at this point, so this
9559 * is just for consistency).
9560 */
9561 /** @todo we could postpone this till we make the call and reload the
9562 * registers after returning from the call. Not sure if that's sensible or
9563 * not, though. */
9564 off = iemNativeRegFlushPendingWrites(pReNative, off);
9565
9566 /*
9567 * Move/spill/flush stuff out of call-volatile registers.
9568 */
9569 /** @todo save+restore active registers and maybe guest shadows in miss
9570 * scenario. */
9571 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9572
9573 /*
9574 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9575 * to call the unmap helper function.
9576 */
9577//pReNative->pInstrBuf[off++] = 0xcc;
9578 RT_NOREF(fAccess);
9579
9580#ifdef RT_ARCH_AMD64
9581 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
9582 {
9583 /* test byte [rbp - xxx], 0ffh */
9584 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9585 pbCodeBuf[off++] = 0xf6;
9586 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
9587 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9588 pbCodeBuf[off++] = 0xff;
9589 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9590 }
9591 else
9592#endif
9593 {
9594 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxVarUnmapInfo, &off);
9595 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9596 }
9597 uint32_t const offJmpFixup = off;
9598 off = iemNativeEmitJzToFixed(pReNative, off, 0);
9599
9600 /*
9601 * Call the unmap helper function.
9602 */
9603#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9604 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9605#else
9606 RT_NOREF(idxInstr);
9607#endif
9608
9609 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo */
9610 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo);
9611
9612 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9613 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9614
9615 /* Done setting up parameters, make the call. */
9616 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9617
9618 /*
9619 * Done, just fixup the jump for the non-call case.
9620 */
9621 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9622
9623 return off;
9624}
9625
9626
9627
9628/*********************************************************************************************************************************
9629* Builtin functions *
9630*********************************************************************************************************************************/
9631
9632/**
9633 * Built-in function that calls a C-implemention function taking zero arguments.
9634 */
9635static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
9636{
9637 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
9638 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
9639 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
9640 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
9641}
9642
9643
9644/**
9645 * Built-in function that checks for pending interrupts that can be delivered or
9646 * forced action flags.
9647 *
9648 * This triggers after the completion of an instruction, so EIP is already at
9649 * the next instruction. If an IRQ or important FF is pending, this will return
9650 * a non-zero status that stops TB execution.
9651 */
9652static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
9653{
9654 RT_NOREF(pCallEntry);
9655
9656 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
9657 and I'm too lazy to create a 'Fixed' version of that one. */
9658 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
9659 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
9660
9661 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
9662
9663 /* Again, we need to load the extended EFLAGS before we actually need them
9664 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
9665 loaded them inside the check, as the shadow state would not be correct
9666 when the code branches before the load. Ditto PC. */
9667 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
9668 kIemNativeGstRegUse_ReadOnly);
9669
9670 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
9671
9672 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9673
9674 /*
9675 * Start by checking the local forced actions of the EMT we're on for IRQs
9676 * and other FFs that needs servicing.
9677 */
9678 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
9679 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
9680 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
9681 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
9682 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
9683 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
9684 | VMCPU_FF_TLB_FLUSH
9685 | VMCPU_FF_UNHALT ),
9686 true /*fSetFlags*/);
9687 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
9688 uint32_t const offFixupJumpToVmCheck1 = off;
9689 off = iemNativeEmitJzToFixed(pReNative, off, 0);
9690
9691 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
9692 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
9693 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
9694 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
9695 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
9696 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
9697
9698 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
9699 suppressed by the CPU or not. */
9700 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
9701 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
9702 idxLabelReturnBreak);
9703
9704 /* We've got shadow flags set, so we must check that the PC they are valid
9705 for matches our current PC value. */
9706 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
9707 * a register. */
9708 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
9709 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
9710
9711 /*
9712 * Now check the force flags of the VM.
9713 */
9714 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
9715 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
9716 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
9717 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
9718 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
9719 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
9720
9721 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
9722
9723 /*
9724 * We're good, no IRQs or FFs pending.
9725 */
9726 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9727 iemNativeRegFreeTmp(pReNative, idxEflReg);
9728 iemNativeRegFreeTmp(pReNative, idxPcReg);
9729
9730 return off;
9731}
9732
9733
9734/**
9735 * Built-in function checks if IEMCPU::fExec has the expected value.
9736 */
9737static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
9738{
9739 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
9740 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9741
9742 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
9743 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
9744 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
9745 kIemNativeLabelType_ReturnBreak);
9746 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9747 return off;
9748}
9749
9750
9751
9752/*********************************************************************************************************************************
9753* The native code generator functions for each MC block. *
9754*********************************************************************************************************************************/
9755
9756
9757/*
9758 * Include g_apfnIemNativeRecompileFunctions and associated functions.
9759 *
9760 * This should probably live in it's own file later, but lets see what the
9761 * compile times turn out to be first.
9762 */
9763#include "IEMNativeFunctions.cpp.h"
9764
9765
9766
9767/*********************************************************************************************************************************
9768* Recompiler Core. *
9769*********************************************************************************************************************************/
9770
9771
9772/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
9773static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
9774{
9775 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
9776 pDis->cbCachedInstr += cbMaxRead;
9777 RT_NOREF(cbMinRead);
9778 return VERR_NO_DATA;
9779}
9780
9781
9782/**
9783 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
9784 * @returns pszBuf.
9785 * @param fFlags The flags.
9786 * @param pszBuf The output buffer.
9787 * @param cbBuf The output buffer size. At least 32 bytes.
9788 */
9789DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
9790{
9791 Assert(cbBuf >= 32);
9792 static RTSTRTUPLE const s_aModes[] =
9793 {
9794 /* [00] = */ { RT_STR_TUPLE("16BIT") },
9795 /* [01] = */ { RT_STR_TUPLE("32BIT") },
9796 /* [02] = */ { RT_STR_TUPLE("!2!") },
9797 /* [03] = */ { RT_STR_TUPLE("!3!") },
9798 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
9799 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
9800 /* [06] = */ { RT_STR_TUPLE("!6!") },
9801 /* [07] = */ { RT_STR_TUPLE("!7!") },
9802 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
9803 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
9804 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
9805 /* [0b] = */ { RT_STR_TUPLE("!b!") },
9806 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
9807 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
9808 /* [0e] = */ { RT_STR_TUPLE("!e!") },
9809 /* [0f] = */ { RT_STR_TUPLE("!f!") },
9810 /* [10] = */ { RT_STR_TUPLE("!10!") },
9811 /* [11] = */ { RT_STR_TUPLE("!11!") },
9812 /* [12] = */ { RT_STR_TUPLE("!12!") },
9813 /* [13] = */ { RT_STR_TUPLE("!13!") },
9814 /* [14] = */ { RT_STR_TUPLE("!14!") },
9815 /* [15] = */ { RT_STR_TUPLE("!15!") },
9816 /* [16] = */ { RT_STR_TUPLE("!16!") },
9817 /* [17] = */ { RT_STR_TUPLE("!17!") },
9818 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
9819 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
9820 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
9821 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
9822 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
9823 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
9824 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
9825 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
9826 };
9827 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
9828 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
9829 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
9830
9831 pszBuf[off++] = ' ';
9832 pszBuf[off++] = 'C';
9833 pszBuf[off++] = 'P';
9834 pszBuf[off++] = 'L';
9835 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
9836 Assert(off < 32);
9837
9838 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
9839
9840 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
9841 {
9842 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
9843 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
9844 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
9845 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
9846 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
9847 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
9848 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
9849 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
9850 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
9851 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
9852 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
9853 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
9854 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
9855 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
9856 };
9857 if (fFlags)
9858 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
9859 if (s_aFlags[i].fFlag & fFlags)
9860 {
9861 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
9862 pszBuf[off++] = ' ';
9863 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
9864 off += s_aFlags[i].cchName;
9865 fFlags &= ~s_aFlags[i].fFlag;
9866 if (!fFlags)
9867 break;
9868 }
9869 pszBuf[off] = '\0';
9870
9871 return pszBuf;
9872}
9873
9874
9875DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9876{
9877 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9878
9879 char szDisBuf[512];
9880 DISSTATE Dis;
9881 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9882 uint32_t const cNative = pTb->Native.cInstructions;
9883 uint32_t offNative = 0;
9884#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9885 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9886#endif
9887 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9888 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9889 : DISCPUMODE_64BIT;
9890#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9891 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9892#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9893 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9894#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9895# error "Port me"
9896#else
9897 csh hDisasm = ~(size_t)0;
9898# if defined(RT_ARCH_AMD64)
9899 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9900# elif defined(RT_ARCH_ARM64)
9901 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9902# else
9903# error "Port me"
9904# endif
9905 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9906#endif
9907
9908 /*
9909 * Print TB info.
9910 */
9911 pHlp->pfnPrintf(pHlp,
9912 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9913 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9914 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9915 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9916#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9917 if (pDbgInfo && pDbgInfo->cEntries > 1)
9918 {
9919 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9920
9921 /*
9922 * This disassembly is driven by the debug info which follows the native
9923 * code and indicates when it starts with the next guest instructions,
9924 * where labels are and such things.
9925 */
9926 uint32_t idxThreadedCall = 0;
9927 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9928 uint8_t idxRange = UINT8_MAX;
9929 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9930 uint32_t offRange = 0;
9931 uint32_t offOpcodes = 0;
9932 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9933 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9934 uint32_t iDbgEntry = 1;
9935 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9936
9937 while (offNative < cNative)
9938 {
9939 /* If we're at or have passed the point where the next chunk of debug
9940 info starts, process it. */
9941 if (offDbgNativeNext <= offNative)
9942 {
9943 offDbgNativeNext = UINT32_MAX;
9944 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9945 {
9946 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9947 {
9948 case kIemTbDbgEntryType_GuestInstruction:
9949 {
9950 /* Did the exec flag change? */
9951 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9952 {
9953 pHlp->pfnPrintf(pHlp,
9954 " fExec change %#08x -> %#08x %s\n",
9955 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9956 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9957 szDisBuf, sizeof(szDisBuf)));
9958 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9959 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9960 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9961 : DISCPUMODE_64BIT;
9962 }
9963
9964 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9965 where the compilation was aborted before the opcode was recorded and the actual
9966 instruction was translated to a threaded call. This may happen when we run out
9967 of ranges, or when some complicated interrupts/FFs are found to be pending or
9968 similar. So, we just deal with it here rather than in the compiler code as it
9969 is a lot simpler to do up here. */
9970 if ( idxRange == UINT8_MAX
9971 || idxRange >= cRanges
9972 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9973 {
9974 idxRange += 1;
9975 if (idxRange < cRanges)
9976 offRange = 0;
9977 else
9978 continue;
9979 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
9980 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9981 + (pTb->aRanges[idxRange].idxPhysPage == 0
9982 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9983 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9984 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9985 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9986 pTb->aRanges[idxRange].idxPhysPage);
9987 }
9988
9989 /* Disassemble the instruction. */
9990 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9991 uint32_t cbInstr = 1;
9992 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9993 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9994 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9995 if (RT_SUCCESS(rc))
9996 {
9997 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9998 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9999 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10000 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10001
10002 static unsigned const s_offMarker = 55;
10003 static char const s_szMarker[] = " ; <--- guest";
10004 if (cch < s_offMarker)
10005 {
10006 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
10007 cch = s_offMarker;
10008 }
10009 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
10010 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
10011
10012 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
10013 }
10014 else
10015 {
10016 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
10017 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
10018 cbInstr = 1;
10019 }
10020 GCPhysPc += cbInstr;
10021 offOpcodes += cbInstr;
10022 offRange += cbInstr;
10023 continue;
10024 }
10025
10026 case kIemTbDbgEntryType_ThreadedCall:
10027 pHlp->pfnPrintf(pHlp,
10028 " Call #%u to %s (%u args)%s\n",
10029 idxThreadedCall,
10030 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
10031 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
10032 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
10033 idxThreadedCall++;
10034 continue;
10035
10036 case kIemTbDbgEntryType_GuestRegShadowing:
10037 {
10038 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
10039 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
10040 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
10041 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
10042 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
10043 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
10044 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
10045 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
10046 else
10047 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
10048 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
10049 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
10050 continue;
10051 }
10052
10053 case kIemTbDbgEntryType_Label:
10054 {
10055 const char *pszName = "what_the_fudge";
10056 const char *pszComment = "";
10057 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
10058 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
10059 {
10060 case kIemNativeLabelType_Return:
10061 pszName = "Return";
10062 break;
10063 case kIemNativeLabelType_ReturnBreak:
10064 pszName = "ReturnBreak";
10065 break;
10066 case kIemNativeLabelType_ReturnWithFlags:
10067 pszName = "ReturnWithFlags";
10068 break;
10069 case kIemNativeLabelType_NonZeroRetOrPassUp:
10070 pszName = "NonZeroRetOrPassUp";
10071 break;
10072 case kIemNativeLabelType_RaiseGp0:
10073 pszName = "RaiseGp0";
10074 break;
10075 case kIemNativeLabelType_If:
10076 pszName = "If";
10077 fNumbered = true;
10078 break;
10079 case kIemNativeLabelType_Else:
10080 pszName = "Else";
10081 fNumbered = true;
10082 pszComment = " ; regs state restored pre-if-block";
10083 break;
10084 case kIemNativeLabelType_Endif:
10085 pszName = "Endif";
10086 fNumbered = true;
10087 break;
10088 case kIemNativeLabelType_CheckIrq:
10089 pszName = "CheckIrq_CheckVM";
10090 fNumbered = true;
10091 break;
10092 case kIemNativeLabelType_TlbMiss:
10093 pszName = "CheckIrq_TlbMiss";
10094 fNumbered = true;
10095 break;
10096 case kIemNativeLabelType_TlbDone:
10097 pszName = "CheckIrq_TlbDone";
10098 fNumbered = true;
10099 break;
10100 case kIemNativeLabelType_Invalid:
10101 case kIemNativeLabelType_End:
10102 break;
10103 }
10104 if (fNumbered)
10105 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
10106 else
10107 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
10108 continue;
10109 }
10110
10111 case kIemTbDbgEntryType_NativeOffset:
10112 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
10113 Assert(offDbgNativeNext > offNative);
10114 break;
10115
10116 default:
10117 AssertFailed();
10118 }
10119 iDbgEntry++;
10120 break;
10121 }
10122 }
10123
10124 /*
10125 * Disassemble the next native instruction.
10126 */
10127 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10128# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10129 uint32_t cbInstr = sizeof(paNative[0]);
10130 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10131 if (RT_SUCCESS(rc))
10132 {
10133# if defined(RT_ARCH_AMD64)
10134 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10135 {
10136 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10137 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10138 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
10139 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10140 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10141 uInfo & 0x8000 ? " - recompiled" : "");
10142 else
10143 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10144 }
10145 else
10146# endif
10147 {
10148# ifdef RT_ARCH_AMD64
10149 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10150 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10151 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10152 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10153# elif defined(RT_ARCH_ARM64)
10154 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10155 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10156 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10157# else
10158# error "Port me"
10159# endif
10160 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10161 }
10162 }
10163 else
10164 {
10165# if defined(RT_ARCH_AMD64)
10166 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10167 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10168# elif defined(RT_ARCH_ARM64)
10169 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10170# else
10171# error "Port me"
10172# endif
10173 cbInstr = sizeof(paNative[0]);
10174 }
10175 offNative += cbInstr / sizeof(paNative[0]);
10176
10177# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10178 cs_insn *pInstr;
10179 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10180 (uintptr_t)pNativeCur, 1, &pInstr);
10181 if (cInstrs > 0)
10182 {
10183 Assert(cInstrs == 1);
10184# if defined(RT_ARCH_AMD64)
10185 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10186 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10187# else
10188 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10189 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10190# endif
10191 offNative += pInstr->size / sizeof(*pNativeCur);
10192 cs_free(pInstr, cInstrs);
10193 }
10194 else
10195 {
10196# if defined(RT_ARCH_AMD64)
10197 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10198 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10199# else
10200 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10201# endif
10202 offNative++;
10203 }
10204# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10205 }
10206 }
10207 else
10208#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
10209 {
10210 /*
10211 * No debug info, just disassemble the x86 code and then the native code.
10212 *
10213 * First the guest code:
10214 */
10215 for (unsigned i = 0; i < pTb->cRanges; i++)
10216 {
10217 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
10218 + (pTb->aRanges[i].idxPhysPage == 0
10219 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
10220 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
10221 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
10222 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
10223 unsigned off = pTb->aRanges[i].offOpcodes;
10224 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
10225 while (off < cbOpcodes)
10226 {
10227 uint32_t cbInstr = 1;
10228 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
10229 &pTb->pabOpcodes[off], cbOpcodes - off,
10230 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
10231 if (RT_SUCCESS(rc))
10232 {
10233 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10234 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10235 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10236 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10237 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
10238 GCPhysPc += cbInstr;
10239 off += cbInstr;
10240 }
10241 else
10242 {
10243 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
10244 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
10245 break;
10246 }
10247 }
10248 }
10249
10250 /*
10251 * Then the native code:
10252 */
10253 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
10254 while (offNative < cNative)
10255 {
10256 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10257# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10258 uint32_t cbInstr = sizeof(paNative[0]);
10259 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10260 if (RT_SUCCESS(rc))
10261 {
10262# if defined(RT_ARCH_AMD64)
10263 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10264 {
10265 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10266 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10267 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
10268 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10269 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10270 uInfo & 0x8000 ? " - recompiled" : "");
10271 else
10272 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10273 }
10274 else
10275# endif
10276 {
10277# ifdef RT_ARCH_AMD64
10278 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10279 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10280 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10281 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10282# elif defined(RT_ARCH_ARM64)
10283 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10284 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10285 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10286# else
10287# error "Port me"
10288# endif
10289 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10290 }
10291 }
10292 else
10293 {
10294# if defined(RT_ARCH_AMD64)
10295 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10296 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10297# else
10298 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10299# endif
10300 cbInstr = sizeof(paNative[0]);
10301 }
10302 offNative += cbInstr / sizeof(paNative[0]);
10303
10304# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10305 cs_insn *pInstr;
10306 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10307 (uintptr_t)pNativeCur, 1, &pInstr);
10308 if (cInstrs > 0)
10309 {
10310 Assert(cInstrs == 1);
10311# if defined(RT_ARCH_AMD64)
10312 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10313 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10314# else
10315 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10316 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10317# endif
10318 offNative += pInstr->size / sizeof(*pNativeCur);
10319 cs_free(pInstr, cInstrs);
10320 }
10321 else
10322 {
10323# if defined(RT_ARCH_AMD64)
10324 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10325 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10326# else
10327 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10328# endif
10329 offNative++;
10330 }
10331# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10332 }
10333 }
10334
10335#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10336 /* Cleanup. */
10337 cs_close(&hDisasm);
10338#endif
10339}
10340
10341
10342/**
10343 * Recompiles the given threaded TB into a native one.
10344 *
10345 * In case of failure the translation block will be returned as-is.
10346 *
10347 * @returns pTb.
10348 * @param pVCpu The cross context virtual CPU structure of the calling
10349 * thread.
10350 * @param pTb The threaded translation to recompile to native.
10351 */
10352DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10353{
10354 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10355
10356 /*
10357 * The first time thru, we allocate the recompiler state, the other times
10358 * we just need to reset it before using it again.
10359 */
10360 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10361 if (RT_LIKELY(pReNative))
10362 iemNativeReInit(pReNative, pTb);
10363 else
10364 {
10365 pReNative = iemNativeInit(pVCpu, pTb);
10366 AssertReturn(pReNative, pTb);
10367 }
10368
10369 /*
10370 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10371 * for aborting if an error happens.
10372 */
10373 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10374#ifdef LOG_ENABLED
10375 uint32_t const cCallsOrg = cCallsLeft;
10376#endif
10377 uint32_t off = 0;
10378 int rc = VINF_SUCCESS;
10379 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10380 {
10381 /*
10382 * Emit prolog code (fixed).
10383 */
10384 off = iemNativeEmitProlog(pReNative, off);
10385
10386 /*
10387 * Convert the calls to native code.
10388 */
10389#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10390 int32_t iGstInstr = -1;
10391#endif
10392#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10393 uint32_t cThreadedCalls = 0;
10394 uint32_t cRecompiledCalls = 0;
10395#endif
10396 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10397 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10398 while (cCallsLeft-- > 0)
10399 {
10400 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10401
10402 /*
10403 * Debug info and assembly markup.
10404 */
10405 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10406 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10407#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10408 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10409 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10410 {
10411 if (iGstInstr < (int32_t)pTb->cInstructions)
10412 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10413 else
10414 Assert(iGstInstr == pTb->cInstructions);
10415 iGstInstr = pCallEntry->idxInstr;
10416 }
10417 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10418#endif
10419#if defined(VBOX_STRICT)
10420 off = iemNativeEmitMarker(pReNative, off,
10421 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
10422 pCallEntry->enmFunction));
10423#endif
10424#if defined(VBOX_STRICT)
10425 iemNativeRegAssertSanity(pReNative);
10426#endif
10427
10428 /*
10429 * Actual work.
10430 */
10431 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
10432 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "" : "(todo)"));
10433 if (pfnRecom) /** @todo stats on this. */
10434 {
10435 off = pfnRecom(pReNative, off, pCallEntry);
10436 STAM_REL_STATS({cRecompiledCalls++;});
10437 }
10438 else
10439 {
10440 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10441 STAM_REL_STATS({cThreadedCalls++;});
10442 }
10443 Assert(off <= pReNative->cInstrBufAlloc);
10444 Assert(pReNative->cCondDepth == 0);
10445
10446 /*
10447 * Advance.
10448 */
10449 pCallEntry++;
10450 }
10451
10452 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10453 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10454 if (!cThreadedCalls)
10455 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10456
10457 /*
10458 * Emit the epilog code.
10459 */
10460 uint32_t idxReturnLabel;
10461 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10462
10463 /*
10464 * Generate special jump labels.
10465 */
10466 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10467 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10468 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10469 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10470 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
10471 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
10472 }
10473 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10474 {
10475 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10476 return pTb;
10477 }
10478 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10479 Assert(off <= pReNative->cInstrBufAlloc);
10480
10481 /*
10482 * Make sure all labels has been defined.
10483 */
10484 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10485#ifdef VBOX_STRICT
10486 uint32_t const cLabels = pReNative->cLabels;
10487 for (uint32_t i = 0; i < cLabels; i++)
10488 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10489#endif
10490
10491 /*
10492 * Allocate executable memory, copy over the code we've generated.
10493 */
10494 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10495 if (pTbAllocator->pDelayedFreeHead)
10496 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10497
10498 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
10499 AssertReturn(paFinalInstrBuf, pTb);
10500 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10501
10502 /*
10503 * Apply fixups.
10504 */
10505 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10506 uint32_t const cFixups = pReNative->cFixups;
10507 for (uint32_t i = 0; i < cFixups; i++)
10508 {
10509 Assert(paFixups[i].off < off);
10510 Assert(paFixups[i].idxLabel < cLabels);
10511 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10512 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10513 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10514 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10515 switch (paFixups[i].enmType)
10516 {
10517#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10518 case kIemNativeFixupType_Rel32:
10519 Assert(paFixups[i].off + 4 <= off);
10520 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10521 continue;
10522
10523#elif defined(RT_ARCH_ARM64)
10524 case kIemNativeFixupType_RelImm26At0:
10525 {
10526 Assert(paFixups[i].off < off);
10527 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10528 Assert(offDisp >= -262144 && offDisp < 262144);
10529 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10530 continue;
10531 }
10532
10533 case kIemNativeFixupType_RelImm19At5:
10534 {
10535 Assert(paFixups[i].off < off);
10536 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10537 Assert(offDisp >= -262144 && offDisp < 262144);
10538 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10539 continue;
10540 }
10541
10542 case kIemNativeFixupType_RelImm14At5:
10543 {
10544 Assert(paFixups[i].off < off);
10545 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10546 Assert(offDisp >= -8192 && offDisp < 8192);
10547 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10548 continue;
10549 }
10550
10551#endif
10552 case kIemNativeFixupType_Invalid:
10553 case kIemNativeFixupType_End:
10554 break;
10555 }
10556 AssertFailed();
10557 }
10558
10559 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10560 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10561
10562 /*
10563 * Convert the translation block.
10564 */
10565 RTMemFree(pTb->Thrd.paCalls);
10566 pTb->Native.paInstructions = paFinalInstrBuf;
10567 pTb->Native.cInstructions = off;
10568 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10569#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10570 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10571 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10572#endif
10573
10574 Assert(pTbAllocator->cThreadedTbs > 0);
10575 pTbAllocator->cThreadedTbs -= 1;
10576 pTbAllocator->cNativeTbs += 1;
10577 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10578
10579#ifdef LOG_ENABLED
10580 /*
10581 * Disassemble to the log if enabled.
10582 */
10583 if (LogIs3Enabled())
10584 {
10585 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10586 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10587# ifdef DEBUG_bird
10588 RTLogFlush(NULL);
10589# endif
10590 }
10591#endif
10592
10593 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10594 return pTb;
10595}
10596
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette