VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102676

Last change on this file since 102676 was 102663, checked in by vboxsync, 17 months ago

VMM/IEM: Working on BODY_CHECK_PC_AFTER_BRANCH and sideeffects of it. Fixed bug in 8-bit register stores (AMD64). Fixed bug in iemNativeEmitBltInCheckOpcodes (AMD64). Added a way to inject state logging between each instruction, currently only really implemented for AMD64. Relaxed the heave flushing code, no need to set the buffer pointer to NULL. Started looking at avoiding code TLB flushing when allocating memory to replace zero pages. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 532.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102663 2023-12-21 01:55:07Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144
145/*********************************************************************************************************************************
146* Executable Memory Allocator *
147*********************************************************************************************************************************/
148/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149 * Use an alternative chunk sub-allocator that does store internal data
150 * in the chunk.
151 *
152 * Using the RTHeapSimple is not practial on newer darwin systems where
153 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
154 * memory. We would have to change the protection of the whole chunk for
155 * every call to RTHeapSimple, which would be rather expensive.
156 *
157 * This alternative implemenation let restrict page protection modifications
158 * to the pages backing the executable memory we just allocated.
159 */
160#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
161/** The chunk sub-allocation unit size in bytes. */
162#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
163/** The chunk sub-allocation unit size as a shift factor. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
165
166#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
167# ifdef IEMNATIVE_USE_GDB_JIT
168# define IEMNATIVE_USE_GDB_JIT_ET_DYN
169
170/** GDB JIT: Code entry. */
171typedef struct GDBJITCODEENTRY
172{
173 struct GDBJITCODEENTRY *pNext;
174 struct GDBJITCODEENTRY *pPrev;
175 uint8_t *pbSymFile;
176 uint64_t cbSymFile;
177} GDBJITCODEENTRY;
178
179/** GDB JIT: Actions. */
180typedef enum GDBJITACTIONS : uint32_t
181{
182 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
183} GDBJITACTIONS;
184
185/** GDB JIT: Descriptor. */
186typedef struct GDBJITDESCRIPTOR
187{
188 uint32_t uVersion;
189 GDBJITACTIONS enmAction;
190 GDBJITCODEENTRY *pRelevant;
191 GDBJITCODEENTRY *pHead;
192 /** Our addition: */
193 GDBJITCODEENTRY *pTail;
194} GDBJITDESCRIPTOR;
195
196/** GDB JIT: Our simple symbol file data. */
197typedef struct GDBJITSYMFILE
198{
199 Elf64_Ehdr EHdr;
200# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Shdr aShdrs[5];
202# else
203 Elf64_Shdr aShdrs[7];
204 Elf64_Phdr aPhdrs[2];
205# endif
206 /** The dwarf ehframe data for the chunk. */
207 uint8_t abEhFrame[512];
208 char szzStrTab[128];
209 Elf64_Sym aSymbols[3];
210# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
211 Elf64_Sym aDynSyms[2];
212 Elf64_Dyn aDyn[6];
213# endif
214} GDBJITSYMFILE;
215
216extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
217extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
218
219/** Init once for g_IemNativeGdbJitLock. */
220static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
221/** Init once for the critical section. */
222static RTCRITSECT g_IemNativeGdbJitLock;
223
224/** GDB reads the info here. */
225GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
226
227/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
228DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
229{
230 ASMNopPause();
231}
232
233/** @callback_method_impl{FNRTONCE} */
234static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
235{
236 RT_NOREF(pvUser);
237 return RTCritSectInit(&g_IemNativeGdbJitLock);
238}
239
240
241# endif /* IEMNATIVE_USE_GDB_JIT */
242
243/**
244 * Per-chunk unwind info for non-windows hosts.
245 */
246typedef struct IEMEXECMEMCHUNKEHFRAME
247{
248# ifdef IEMNATIVE_USE_LIBUNWIND
249 /** The offset of the FDA into abEhFrame. */
250 uintptr_t offFda;
251# else
252 /** 'struct object' storage area. */
253 uint8_t abObject[1024];
254# endif
255# ifdef IEMNATIVE_USE_GDB_JIT
256# if 0
257 /** The GDB JIT 'symbol file' data. */
258 GDBJITSYMFILE GdbJitSymFile;
259# endif
260 /** The GDB JIT list entry. */
261 GDBJITCODEENTRY GdbJitEntry;
262# endif
263 /** The dwarf ehframe data for the chunk. */
264 uint8_t abEhFrame[512];
265} IEMEXECMEMCHUNKEHFRAME;
266/** Pointer to per-chunk info info for non-windows hosts. */
267typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
268#endif
269
270
271/**
272 * An chunk of executable memory.
273 */
274typedef struct IEMEXECMEMCHUNK
275{
276#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
277 /** Number of free items in this chunk. */
278 uint32_t cFreeUnits;
279 /** Hint were to start searching for free space in the allocation bitmap. */
280 uint32_t idxFreeHint;
281#else
282 /** The heap handle. */
283 RTHEAPSIMPLE hHeap;
284#endif
285 /** Pointer to the chunk. */
286 void *pvChunk;
287#ifdef IN_RING3
288 /**
289 * Pointer to the unwind information.
290 *
291 * This is used during C++ throw and longjmp (windows and probably most other
292 * platforms). Some debuggers (windbg) makes use of it as well.
293 *
294 * Windows: This is allocated from hHeap on windows because (at least for
295 * AMD64) the UNWIND_INFO structure address in the
296 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
297 *
298 * Others: Allocated from the regular heap to avoid unnecessary executable data
299 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
300 void *pvUnwindInfo;
301#elif defined(IN_RING0)
302 /** Allocation handle. */
303 RTR0MEMOBJ hMemObj;
304#endif
305} IEMEXECMEMCHUNK;
306/** Pointer to a memory chunk. */
307typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
308
309
310/**
311 * Executable memory allocator for the native recompiler.
312 */
313typedef struct IEMEXECMEMALLOCATOR
314{
315 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
316 uint32_t uMagic;
317
318 /** The chunk size. */
319 uint32_t cbChunk;
320 /** The maximum number of chunks. */
321 uint32_t cMaxChunks;
322 /** The current number of chunks. */
323 uint32_t cChunks;
324 /** Hint where to start looking for available memory. */
325 uint32_t idxChunkHint;
326 /** Statistics: Current number of allocations. */
327 uint32_t cAllocations;
328
329 /** The total amount of memory available. */
330 uint64_t cbTotal;
331 /** Total amount of free memory. */
332 uint64_t cbFree;
333 /** Total amount of memory allocated. */
334 uint64_t cbAllocated;
335
336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
337 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
338 *
339 * Since the chunk size is a power of two and the minimum chunk size is a lot
340 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
341 * require a whole number of uint64_t elements in the allocation bitmap. So,
342 * for sake of simplicity, they are allocated as one continous chunk for
343 * simplicity/laziness. */
344 uint64_t *pbmAlloc;
345 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
346 uint32_t cUnitsPerChunk;
347 /** Number of bitmap elements per chunk (for quickly locating the bitmap
348 * portion corresponding to an chunk). */
349 uint32_t cBitmapElementsPerChunk;
350#else
351 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
352 * @{ */
353 /** The size of the heap internal block header. This is used to adjust the
354 * request memory size to make sure there is exacly enough room for a header at
355 * the end of the blocks we allocate before the next 64 byte alignment line. */
356 uint32_t cbHeapBlockHdr;
357 /** The size of initial heap allocation required make sure the first
358 * allocation is correctly aligned. */
359 uint32_t cbHeapAlignTweak;
360 /** The alignment tweak allocation address. */
361 void *pvAlignTweak;
362 /** @} */
363#endif
364
365#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
366 /** Pointer to the array of unwind info running parallel to aChunks (same
367 * allocation as this structure, located after the bitmaps).
368 * (For Windows, the structures must reside in 32-bit RVA distance to the
369 * actual chunk, so they are allocated off the chunk.) */
370 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
371#endif
372
373 /** The allocation chunks. */
374 RT_FLEXIBLE_ARRAY_EXTENSION
375 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
376} IEMEXECMEMALLOCATOR;
377/** Pointer to an executable memory allocator. */
378typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
379
380/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
381#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
382
383
384static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
385
386
387/**
388 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
389 * the heap statistics.
390 */
391static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
392 uint32_t cbReq, uint32_t idxChunk)
393{
394 pExecMemAllocator->cAllocations += 1;
395 pExecMemAllocator->cbAllocated += cbReq;
396#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
397 pExecMemAllocator->cbFree -= cbReq;
398#else
399 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
400#endif
401 pExecMemAllocator->idxChunkHint = idxChunk;
402
403#ifdef RT_OS_DARWIN
404 /*
405 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
406 * on darwin. So, we mark the pages returned as read+write after alloc and
407 * expect the caller to call iemExecMemAllocatorReadyForUse when done
408 * writing to the allocation.
409 *
410 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
411 * for details.
412 */
413 /** @todo detect if this is necessary... it wasn't required on 10.15 or
414 * whatever older version it was. */
415 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
416 AssertRC(rc);
417#endif
418
419 return pvRet;
420}
421
422
423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
424static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
425 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
426{
427 /*
428 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
429 */
430 Assert(!(cToScan & 63));
431 Assert(!(idxFirst & 63));
432 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
433 pbmAlloc += idxFirst / 64;
434
435 /*
436 * Scan the bitmap for cReqUnits of consequtive clear bits
437 */
438 /** @todo This can probably be done more efficiently for non-x86 systems. */
439 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
440 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
441 {
442 uint32_t idxAddBit = 1;
443 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
444 idxAddBit++;
445 if (idxAddBit >= cReqUnits)
446 {
447 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
448
449 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
450 pChunk->cFreeUnits -= cReqUnits;
451 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
452
453 void * const pvRet = (uint8_t *)pChunk->pvChunk
454 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
455
456 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
457 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
458 }
459
460 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
461 }
462 return NULL;
463}
464#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
465
466
467static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
468{
469#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
470 /*
471 * Figure out how much to allocate.
472 */
473 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
482 if (pvRet)
483 return pvRet;
484 }
485 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
486 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
487 cReqUnits, idxChunk);
488 }
489#else
490 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
491 if (pvRet)
492 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
493#endif
494 return NULL;
495
496}
497
498
499/**
500 * Allocates @a cbReq bytes of executable memory.
501 *
502 * @returns Pointer to the memory, NULL if out of memory or other problem
503 * encountered.
504 * @param pVCpu The cross context virtual CPU structure of the calling
505 * thread.
506 * @param cbReq How many bytes are required.
507 */
508static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
509{
510 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
511 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
512 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
513
514
515 for (unsigned iIteration = 0;; iIteration++)
516 {
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /*
565 * Try prune native TBs once.
566 */
567 if (iIteration == 0)
568 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
569 else
570 {
571 /** @todo stats... */
572 return NULL;
573 }
574 }
575
576}
577
578
579/** This is a hook that we may need later for changing memory protection back
580 * to readonly+exec */
581static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
582{
583#ifdef RT_OS_DARWIN
584 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
585 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
586 AssertRC(rc); RT_NOREF(pVCpu);
587
588 /*
589 * Flush the instruction cache:
590 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
591 */
592 /* sys_dcache_flush(pv, cb); - not necessary */
593 sys_icache_invalidate(pv, cb);
594#else
595 RT_NOREF(pVCpu, pv, cb);
596#endif
597}
598
599
600/**
601 * Frees executable memory.
602 */
603void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
604{
605 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
606 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
607 Assert(pv);
608#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
609 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
610#else
611 Assert(!((uintptr_t)pv & 63));
612#endif
613
614 /* Align the size as we did when allocating the block. */
615#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
616 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
617#else
618 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
619#endif
620
621 /* Free it / assert sanity. */
622#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
623 uint32_t const cChunks = pExecMemAllocator->cChunks;
624 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
625 bool fFound = false;
626 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
627 {
628 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
629 fFound = offChunk < cbChunk;
630 if (fFound)
631 {
632#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
633 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
634 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
635
636 /* Check that it's valid and free it. */
637 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
638 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
639 for (uint32_t i = 1; i < cReqUnits; i++)
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
641 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
642
643 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
644 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
645
646 /* Update the stats. */
647 pExecMemAllocator->cbAllocated -= cb;
648 pExecMemAllocator->cbFree += cb;
649 pExecMemAllocator->cAllocations -= 1;
650 return;
651#else
652 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
653 break;
654#endif
655 }
656 }
657# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
658 AssertFailed();
659# else
660 Assert(fFound);
661# endif
662#endif
663
664#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
665 /* Update stats while cb is freshly calculated.*/
666 pExecMemAllocator->cbAllocated -= cb;
667 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
668 pExecMemAllocator->cAllocations -= 1;
669
670 /* Free it. */
671 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
672#endif
673}
674
675
676
677#ifdef IN_RING3
678# ifdef RT_OS_WINDOWS
679
680/**
681 * Initializes the unwind info structures for windows hosts.
682 */
683static int
684iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
685 void *pvChunk, uint32_t idxChunk)
686{
687 RT_NOREF(pVCpu);
688
689 /*
690 * The AMD64 unwind opcodes.
691 *
692 * This is a program that starts with RSP after a RET instruction that
693 * ends up in recompiled code, and the operations we describe here will
694 * restore all non-volatile registers and bring RSP back to where our
695 * RET address is. This means it's reverse order from what happens in
696 * the prologue.
697 *
698 * Note! Using a frame register approach here both because we have one
699 * and but mainly because the UWOP_ALLOC_LARGE argument values
700 * would be a pain to write initializers for. On the positive
701 * side, we're impervious to changes in the the stack variable
702 * area can can deal with dynamic stack allocations if necessary.
703 */
704 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
705 {
706 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
707 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
708 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
709 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
710 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
711 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
712 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
713 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
714 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
715 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
716 };
717 union
718 {
719 IMAGE_UNWIND_INFO Info;
720 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
721 } s_UnwindInfo =
722 {
723 {
724 /* .Version = */ 1,
725 /* .Flags = */ 0,
726 /* .SizeOfProlog = */ 16, /* whatever */
727 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
728 /* .FrameRegister = */ X86_GREG_xBP,
729 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
730 }
731 };
732 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
733 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
734
735 /*
736 * Calc how much space we need and allocate it off the exec heap.
737 */
738 unsigned const cFunctionEntries = 1;
739 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
740 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
741# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
743 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
744 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
745# else
746 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
747 - pExecMemAllocator->cbHeapBlockHdr;
748 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
749 32 /*cbAlignment*/);
750# endif
751 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
752 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
753
754 /*
755 * Initialize the structures.
756 */
757 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
758
759 paFunctions[0].BeginAddress = 0;
760 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
761 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
762
763 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
764 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
765
766 /*
767 * Register it.
768 */
769 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
770 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
771
772 return VINF_SUCCESS;
773}
774
775
776# else /* !RT_OS_WINDOWS */
777
778/**
779 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
780 */
781DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
782{
783 if (iValue >= 64)
784 {
785 Assert(iValue < 0x2000);
786 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
787 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
788 }
789 else if (iValue >= 0)
790 *Ptr.pb++ = (uint8_t)iValue;
791 else if (iValue > -64)
792 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
793 else
794 {
795 Assert(iValue > -0x2000);
796 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
797 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
798 }
799 return Ptr;
800}
801
802
803/**
804 * Emits an ULEB128 encoded value (up to 64-bit wide).
805 */
806DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
807{
808 while (uValue >= 0x80)
809 {
810 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
811 uValue >>= 7;
812 }
813 *Ptr.pb++ = (uint8_t)uValue;
814 return Ptr;
815}
816
817
818/**
819 * Emits a CFA rule as register @a uReg + offset @a off.
820 */
821DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
822{
823 *Ptr.pb++ = DW_CFA_def_cfa;
824 Ptr = iemDwarfPutUleb128(Ptr, uReg);
825 Ptr = iemDwarfPutUleb128(Ptr, off);
826 return Ptr;
827}
828
829
830/**
831 * Emits a register (@a uReg) save location:
832 * CFA + @a off * data_alignment_factor
833 */
834DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
835{
836 if (uReg < 0x40)
837 *Ptr.pb++ = DW_CFA_offset | uReg;
838 else
839 {
840 *Ptr.pb++ = DW_CFA_offset_extended;
841 Ptr = iemDwarfPutUleb128(Ptr, uReg);
842 }
843 Ptr = iemDwarfPutUleb128(Ptr, off);
844 return Ptr;
845}
846
847
848# if 0 /* unused */
849/**
850 * Emits a register (@a uReg) save location, using signed offset:
851 * CFA + @a offSigned * data_alignment_factor
852 */
853DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
854{
855 *Ptr.pb++ = DW_CFA_offset_extended_sf;
856 Ptr = iemDwarfPutUleb128(Ptr, uReg);
857 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
858 return Ptr;
859}
860# endif
861
862
863/**
864 * Initializes the unwind info section for non-windows hosts.
865 */
866static int
867iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
868 void *pvChunk, uint32_t idxChunk)
869{
870 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
871 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
872
873 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
874
875 /*
876 * Generate the CIE first.
877 */
878# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
879 uint8_t const iDwarfVer = 3;
880# else
881 uint8_t const iDwarfVer = 4;
882# endif
883 RTPTRUNION const PtrCie = Ptr;
884 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
885 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
886 *Ptr.pb++ = iDwarfVer; /* DwARF version */
887 *Ptr.pb++ = 0; /* Augmentation. */
888 if (iDwarfVer >= 4)
889 {
890 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
891 *Ptr.pb++ = 0; /* Segment selector size. */
892 }
893# ifdef RT_ARCH_AMD64
894 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
895# else
896 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
897# endif
898 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
901# elif defined(RT_ARCH_ARM64)
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
903# else
904# error "port me"
905# endif
906 /* Initial instructions: */
907# ifdef RT_ARCH_AMD64
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
916# elif defined(RT_ARCH_ARM64)
917# if 1
918 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
919# else
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
921# endif
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
934 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
935 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
936# else
937# error "port me"
938# endif
939 while ((Ptr.u - PtrCie.u) & 3)
940 *Ptr.pb++ = DW_CFA_nop;
941 /* Finalize the CIE size. */
942 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
943
944 /*
945 * Generate an FDE for the whole chunk area.
946 */
947# ifdef IEMNATIVE_USE_LIBUNWIND
948 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
949# endif
950 RTPTRUNION const PtrFde = Ptr;
951 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
952 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
953 Ptr.pu32++;
954 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
955 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
956# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
957 *Ptr.pb++ = DW_CFA_nop;
958# endif
959 while ((Ptr.u - PtrFde.u) & 3)
960 *Ptr.pb++ = DW_CFA_nop;
961 /* Finalize the FDE size. */
962 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
963
964 /* Terminator entry. */
965 *Ptr.pu32++ = 0;
966 *Ptr.pu32++ = 0; /* just to be sure... */
967 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
968
969 /*
970 * Register it.
971 */
972# ifdef IEMNATIVE_USE_LIBUNWIND
973 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
974# else
975 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
976 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
977# endif
978
979# ifdef IEMNATIVE_USE_GDB_JIT
980 /*
981 * Now for telling GDB about this (experimental).
982 *
983 * This seems to work best with ET_DYN.
984 */
985 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
986# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
987 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
988 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
989# else
990 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
991 - pExecMemAllocator->cbHeapBlockHdr;
992 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
993# endif
994 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
995 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
996
997 RT_ZERO(*pSymFile);
998
999 /*
1000 * The ELF header:
1001 */
1002 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1003 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1004 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1005 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1006 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1007 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1008 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1009 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1010# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1011 pSymFile->EHdr.e_type = ET_DYN;
1012# else
1013 pSymFile->EHdr.e_type = ET_REL;
1014# endif
1015# ifdef RT_ARCH_AMD64
1016 pSymFile->EHdr.e_machine = EM_AMD64;
1017# elif defined(RT_ARCH_ARM64)
1018 pSymFile->EHdr.e_machine = EM_AARCH64;
1019# else
1020# error "port me"
1021# endif
1022 pSymFile->EHdr.e_version = 1; /*?*/
1023 pSymFile->EHdr.e_entry = 0;
1024# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1025 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phoff = 0;
1028# endif
1029 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1030 pSymFile->EHdr.e_flags = 0;
1031 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1032# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1033 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1034 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1035# else
1036 pSymFile->EHdr.e_phentsize = 0;
1037 pSymFile->EHdr.e_phnum = 0;
1038# endif
1039 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1040 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1041 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1042
1043 uint32_t offStrTab = 0;
1044#define APPEND_STR(a_szStr) do { \
1045 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1046 offStrTab += sizeof(a_szStr); \
1047 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1048 } while (0)
1049#define APPEND_STR_FMT(a_szStr, ...) do { \
1050 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1051 offStrTab++; \
1052 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1053 } while (0)
1054
1055 /*
1056 * Section headers.
1057 */
1058 /* Section header #0: NULL */
1059 unsigned i = 0;
1060 APPEND_STR("");
1061 RT_ZERO(pSymFile->aShdrs[i]);
1062 i++;
1063
1064 /* Section header: .eh_frame */
1065 pSymFile->aShdrs[i].sh_name = offStrTab;
1066 APPEND_STR(".eh_frame");
1067 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1068 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1069# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1070 pSymFile->aShdrs[i].sh_offset
1071 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1072# else
1073 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1074 pSymFile->aShdrs[i].sh_offset = 0;
1075# endif
1076
1077 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1078 pSymFile->aShdrs[i].sh_link = 0;
1079 pSymFile->aShdrs[i].sh_info = 0;
1080 pSymFile->aShdrs[i].sh_addralign = 1;
1081 pSymFile->aShdrs[i].sh_entsize = 0;
1082 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1083 i++;
1084
1085 /* Section header: .shstrtab */
1086 unsigned const iShStrTab = i;
1087 pSymFile->EHdr.e_shstrndx = iShStrTab;
1088 pSymFile->aShdrs[i].sh_name = offStrTab;
1089 APPEND_STR(".shstrtab");
1090 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1091 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1092# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1093 pSymFile->aShdrs[i].sh_offset
1094 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1095# else
1096 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1097 pSymFile->aShdrs[i].sh_offset = 0;
1098# endif
1099 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1100 pSymFile->aShdrs[i].sh_link = 0;
1101 pSymFile->aShdrs[i].sh_info = 0;
1102 pSymFile->aShdrs[i].sh_addralign = 1;
1103 pSymFile->aShdrs[i].sh_entsize = 0;
1104 i++;
1105
1106 /* Section header: .symbols */
1107 pSymFile->aShdrs[i].sh_name = offStrTab;
1108 APPEND_STR(".symtab");
1109 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1110 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1111 pSymFile->aShdrs[i].sh_offset
1112 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1113 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1114 pSymFile->aShdrs[i].sh_link = iShStrTab;
1115 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1117 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1118 i++;
1119
1120# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1121 /* Section header: .symbols */
1122 pSymFile->aShdrs[i].sh_name = offStrTab;
1123 APPEND_STR(".dynsym");
1124 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1125 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1126 pSymFile->aShdrs[i].sh_offset
1127 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1128 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1129 pSymFile->aShdrs[i].sh_link = iShStrTab;
1130 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1132 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1133 i++;
1134# endif
1135
1136# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1137 /* Section header: .dynamic */
1138 pSymFile->aShdrs[i].sh_name = offStrTab;
1139 APPEND_STR(".dynamic");
1140 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1141 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1142 pSymFile->aShdrs[i].sh_offset
1143 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1144 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1145 pSymFile->aShdrs[i].sh_link = iShStrTab;
1146 pSymFile->aShdrs[i].sh_info = 0;
1147 pSymFile->aShdrs[i].sh_addralign = 1;
1148 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1149 i++;
1150# endif
1151
1152 /* Section header: .text */
1153 unsigned const iShText = i;
1154 pSymFile->aShdrs[i].sh_name = offStrTab;
1155 APPEND_STR(".text");
1156 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1157 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1158# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1159 pSymFile->aShdrs[i].sh_offset
1160 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1161# else
1162 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1163 pSymFile->aShdrs[i].sh_offset = 0;
1164# endif
1165 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1166 pSymFile->aShdrs[i].sh_link = 0;
1167 pSymFile->aShdrs[i].sh_info = 0;
1168 pSymFile->aShdrs[i].sh_addralign = 1;
1169 pSymFile->aShdrs[i].sh_entsize = 0;
1170 i++;
1171
1172 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1173
1174# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1175 /*
1176 * The program headers:
1177 */
1178 /* Everything in a single LOAD segment: */
1179 i = 0;
1180 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1181 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1182 pSymFile->aPhdrs[i].p_offset
1183 = pSymFile->aPhdrs[i].p_vaddr
1184 = pSymFile->aPhdrs[i].p_paddr = 0;
1185 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1186 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1187 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1188 i++;
1189 /* The .dynamic segment. */
1190 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1191 pSymFile->aPhdrs[i].p_flags = PF_R;
1192 pSymFile->aPhdrs[i].p_offset
1193 = pSymFile->aPhdrs[i].p_vaddr
1194 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1195 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1196 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1197 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1198 i++;
1199
1200 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1201
1202 /*
1203 * The dynamic section:
1204 */
1205 i = 0;
1206 pSymFile->aDyn[i].d_tag = DT_SONAME;
1207 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1208 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1211 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1214 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_NULL;
1223 i++;
1224 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1225# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1226
1227 /*
1228 * Symbol tables:
1229 */
1230 /** @todo gdb doesn't seem to really like this ... */
1231 i = 0;
1232 pSymFile->aSymbols[i].st_name = 0;
1233 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1234 pSymFile->aSymbols[i].st_value = 0;
1235 pSymFile->aSymbols[i].st_size = 0;
1236 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1237 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1238# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1239 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1240# endif
1241 i++;
1242
1243 pSymFile->aSymbols[i].st_name = 0;
1244 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1245 pSymFile->aSymbols[i].st_value = 0;
1246 pSymFile->aSymbols[i].st_size = 0;
1247 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1248 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1249 i++;
1250
1251 pSymFile->aSymbols[i].st_name = offStrTab;
1252 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1253# if 0
1254 pSymFile->aSymbols[i].st_shndx = iShText;
1255 pSymFile->aSymbols[i].st_value = 0;
1256# else
1257 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1258 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1259# endif
1260 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1261 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1262 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1263# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1264 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1265 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1266# endif
1267 i++;
1268
1269 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1270 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1271
1272 /*
1273 * The GDB JIT entry and informing GDB.
1274 */
1275 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1276# if 1
1277 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1278# else
1279 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1280# endif
1281
1282 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1283 RTCritSectEnter(&g_IemNativeGdbJitLock);
1284 pEhFrame->GdbJitEntry.pNext = NULL;
1285 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1286 if (__jit_debug_descriptor.pTail)
1287 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1288 else
1289 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1290 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1291 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1292
1293 /* Notify GDB: */
1294 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1295 __jit_debug_register_code();
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1297 RTCritSectLeave(&g_IemNativeGdbJitLock);
1298
1299# else /* !IEMNATIVE_USE_GDB_JIT */
1300 RT_NOREF(pVCpu);
1301# endif /* !IEMNATIVE_USE_GDB_JIT */
1302
1303 return VINF_SUCCESS;
1304}
1305
1306# endif /* !RT_OS_WINDOWS */
1307#endif /* IN_RING3 */
1308
1309
1310/**
1311 * Adds another chunk to the executable memory allocator.
1312 *
1313 * This is used by the init code for the initial allocation and later by the
1314 * regular allocator function when it's out of memory.
1315 */
1316static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1317{
1318 /* Check that we've room for growth. */
1319 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1320 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1321
1322 /* Allocate a chunk. */
1323#ifdef RT_OS_DARWIN
1324 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1325#else
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1327#endif
1328 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1329
1330#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1331 int rc = VINF_SUCCESS;
1332#else
1333 /* Initialize the heap for the chunk. */
1334 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1335 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1336 AssertRC(rc);
1337 if (RT_SUCCESS(rc))
1338 {
1339 /*
1340 * We want the memory to be aligned on 64 byte, so the first time thru
1341 * here we do some exploratory allocations to see how we can achieve this.
1342 * On subsequent runs we only make an initial adjustment allocation, if
1343 * necessary.
1344 *
1345 * Since we own the heap implementation, we know that the internal block
1346 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1347 * so all we need to wrt allocation size adjustments is to add 32 bytes
1348 * to the size, align up by 64 bytes, and subtract 32 bytes.
1349 *
1350 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1351 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1352 * allocation to force subsequent allocations to return 64 byte aligned
1353 * user areas.
1354 */
1355 if (!pExecMemAllocator->cbHeapBlockHdr)
1356 {
1357 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1358 pExecMemAllocator->cbHeapAlignTweak = 64;
1359 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1360 32 /*cbAlignment*/);
1361 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1362
1363 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1364 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1365 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1366 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1367 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1368
1369 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 RTHeapSimpleFree(hHeap, pvTest2);
1376 RTHeapSimpleFree(hHeap, pvTest1);
1377 }
1378 else
1379 {
1380 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1381 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1382 }
1383 if (RT_SUCCESS(rc))
1384#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1385 {
1386 /*
1387 * Add the chunk.
1388 *
1389 * This must be done before the unwind init so windows can allocate
1390 * memory from the chunk when using the alternative sub-allocator.
1391 */
1392 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1393#ifdef IN_RING3
1394 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1395#endif
1396#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1397 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1398#else
1399 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1400 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1401 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1402 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1403#endif
1404
1405 pExecMemAllocator->cChunks = idxChunk + 1;
1406 pExecMemAllocator->idxChunkHint = idxChunk;
1407
1408#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1409 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1410 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1411#else
1412 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1413 pExecMemAllocator->cbTotal += cbFree;
1414 pExecMemAllocator->cbFree += cbFree;
1415#endif
1416
1417#ifdef IN_RING3
1418 /*
1419 * Initialize the unwind information (this cannot really fail atm).
1420 * (This sets pvUnwindInfo.)
1421 */
1422 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1423 if (RT_SUCCESS(rc))
1424#endif
1425 {
1426 return VINF_SUCCESS;
1427 }
1428
1429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1430 /* Just in case the impossible happens, undo the above up: */
1431 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1432 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1433 pExecMemAllocator->cChunks = idxChunk;
1434 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1435 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1436 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1437 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1438#endif
1439 }
1440#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1441 }
1442#endif
1443 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1444 RT_NOREF(pVCpu);
1445 return rc;
1446}
1447
1448
1449/**
1450 * Initializes the executable memory allocator for native recompilation on the
1451 * calling EMT.
1452 *
1453 * @returns VBox status code.
1454 * @param pVCpu The cross context virtual CPU structure of the calling
1455 * thread.
1456 * @param cbMax The max size of the allocator.
1457 * @param cbInitial The initial allocator size.
1458 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1459 * dependent).
1460 */
1461int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1462{
1463 /*
1464 * Validate input.
1465 */
1466 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1467 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1468 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1469 || cbChunk == 0
1470 || ( RT_IS_POWER_OF_TWO(cbChunk)
1471 && cbChunk >= _1M
1472 && cbChunk <= _256M
1473 && cbChunk <= cbMax),
1474 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1475 VERR_OUT_OF_RANGE);
1476
1477 /*
1478 * Adjust/figure out the chunk size.
1479 */
1480 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1481 {
1482 if (cbMax >= _256M)
1483 cbChunk = _64M;
1484 else
1485 {
1486 if (cbMax < _16M)
1487 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1488 else
1489 cbChunk = (uint32_t)cbMax / 4;
1490 if (!RT_IS_POWER_OF_TWO(cbChunk))
1491 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1492 }
1493 }
1494
1495 if (cbChunk > cbMax)
1496 cbMax = cbChunk;
1497 else
1498 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1499 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1500 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1501
1502 /*
1503 * Allocate and initialize the allocatore instance.
1504 */
1505 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1506#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1507 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1508 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1509 cbNeeded += cbBitmap * cMaxChunks;
1510 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1511 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1512#endif
1513#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1514 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1515 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1516#endif
1517 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1518 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1519 VERR_NO_MEMORY);
1520 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1521 pExecMemAllocator->cbChunk = cbChunk;
1522 pExecMemAllocator->cMaxChunks = cMaxChunks;
1523 pExecMemAllocator->cChunks = 0;
1524 pExecMemAllocator->idxChunkHint = 0;
1525 pExecMemAllocator->cAllocations = 0;
1526 pExecMemAllocator->cbTotal = 0;
1527 pExecMemAllocator->cbFree = 0;
1528 pExecMemAllocator->cbAllocated = 0;
1529#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1530 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1531 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1532 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1533 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1534#endif
1535#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1536 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1537#endif
1538 for (uint32_t i = 0; i < cMaxChunks; i++)
1539 {
1540#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1541 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1542 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1543#else
1544 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1545#endif
1546 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1547#ifdef IN_RING0
1548 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1549#else
1550 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1551#endif
1552 }
1553 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1554
1555 /*
1556 * Do the initial allocations.
1557 */
1558 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1559 {
1560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1561 AssertLogRelRCReturn(rc, rc);
1562 }
1563
1564 pExecMemAllocator->idxChunkHint = 0;
1565
1566 return VINF_SUCCESS;
1567}
1568
1569
1570/*********************************************************************************************************************************
1571* Native Recompilation *
1572*********************************************************************************************************************************/
1573
1574
1575/**
1576 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1577 */
1578IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1579{
1580 pVCpu->iem.s.cInstructions += idxInstr;
1581 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1582}
1583
1584
1585/**
1586 * Used by TB code when it wants to raise a \#GP(0).
1587 */
1588IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1589{
1590 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1591#ifndef _MSC_VER
1592 return VINF_IEM_RAISED_XCPT; /* not reached */
1593#endif
1594}
1595
1596
1597/**
1598 * Used by TB code when detecting opcode changes.
1599 * @see iemThreadeFuncWorkerObsoleteTb
1600 */
1601IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1602{
1603 /* We set fSafeToFree to false where as we're being called in the context
1604 of a TB callback function, which for native TBs means we cannot release
1605 the executable memory till we've returned our way back to iemTbExec as
1606 that return path codes via the native code generated for the TB. */
1607 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1608 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1609 return VINF_IEM_REEXEC_BREAK;
1610}
1611
1612
1613/**
1614 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1615 */
1616IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1617{
1618 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1619 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1620 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1621 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1622 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1623 return VINF_IEM_REEXEC_BREAK;
1624}
1625
1626
1627/**
1628 * Used by TB code when we missed a PC check after a branch.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1631{
1632 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1633 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1634 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1635 pVCpu->iem.s.pbInstrBuf));
1636 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1637 return VINF_IEM_REEXEC_BREAK;
1638}
1639
1640
1641
1642/*********************************************************************************************************************************
1643* Helpers: Segmented memory fetches and stores. *
1644*********************************************************************************************************************************/
1645
1646/**
1647 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1648 */
1649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1650{
1651 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1652}
1653
1654
1655/**
1656 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1657 * to 16 bits.
1658 */
1659IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1660{
1661 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1662}
1663
1664
1665/**
1666 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1667 * to 32 bits.
1668 */
1669IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1670{
1671 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1672}
1673
1674/**
1675 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1676 * to 64 bits.
1677 */
1678IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1679{
1680 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1681}
1682
1683
1684/**
1685 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1686 */
1687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1688{
1689 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1690}
1691
1692
1693/**
1694 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1695 * to 32 bits.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1698{
1699 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1705 * to 64 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1710}
1711
1712
1713/**
1714 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1717{
1718 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1719}
1720
1721
1722/**
1723 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1724 * to 64 bits.
1725 */
1726IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1727{
1728 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1729}
1730
1731
1732/**
1733 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1736{
1737 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1738}
1739
1740
1741/**
1742 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1745{
1746 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1747}
1748
1749
1750/**
1751 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1754{
1755 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1756}
1757
1758
1759/**
1760 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1763{
1764 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1765}
1766
1767
1768/**
1769 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1772{
1773 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1774}
1775
1776
1777
1778/**
1779 * Used by TB code to push unsigned 16-bit value onto a generic stack.
1780 */
1781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1782{
1783 iemMemStackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemStackPushU16SafeJmp */
1784}
1785
1786
1787/**
1788 * Used by TB code to push unsigned 32-bit value onto a generic stack.
1789 */
1790IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
1791{
1792 iemMemStackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SafeJmp */
1793}
1794
1795
1796/**
1797 * Used by TB code to push 32-bit selector value onto a generic stack.
1798 *
1799 * Intel CPUs doesn't do write a whole dword, thus the special function.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
1802{
1803 iemMemStackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SRegSafeJmp */
1804}
1805
1806
1807/**
1808 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
1811{
1812 iemMemStackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemStackPushU64SafeJmp */
1813}
1814
1815
1816/**
1817 * Used by TB code to pop a 16-bit general purpose register off a generic stack.
1818 */
1819IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
1820{
1821 iemMemStackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU16SafeJmp */
1822}
1823
1824
1825/**
1826 * Used by TB code to pop a 32-bit general purpose register off a generic stack.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
1829{
1830 iemMemStackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU32SafeJmp */
1831}
1832
1833
1834/**
1835 * Used by TB code to pop a 64-bit general purpose register off a generic stack.
1836 */
1837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
1838{
1839 iemMemStackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU64SafeJmp */
1840}
1841
1842
1843
1844/*********************************************************************************************************************************
1845* Helpers: Flat memory fetches and stores. *
1846*********************************************************************************************************************************/
1847
1848/**
1849 * Used by TB code to load unsigned 8-bit data w/ flat address.
1850 * @note Zero extending the value to 64-bit to simplify assembly.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1853{
1854 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1855}
1856
1857
1858/**
1859 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1860 * to 16 bits.
1861 * @note Zero extending the value to 64-bit to simplify assembly.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1864{
1865 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1866}
1867
1868
1869/**
1870 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1871 * to 32 bits.
1872 * @note Zero extending the value to 64-bit to simplify assembly.
1873 */
1874IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1875{
1876 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1877}
1878
1879
1880/**
1881 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1882 * to 64 bits.
1883 */
1884IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1885{
1886 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1887}
1888
1889
1890/**
1891 * Used by TB code to load unsigned 16-bit data w/ flat address.
1892 * @note Zero extending the value to 64-bit to simplify assembly.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1895{
1896 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1897}
1898
1899
1900/**
1901 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1902 * to 32 bits.
1903 * @note Zero extending the value to 64-bit to simplify assembly.
1904 */
1905IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1906{
1907 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1908}
1909
1910
1911/**
1912 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1913 * to 64 bits.
1914 * @note Zero extending the value to 64-bit to simplify assembly.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1917{
1918 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
1919}
1920
1921
1922/**
1923 * Used by TB code to load unsigned 32-bit data w/ flat address.
1924 * @note Zero extending the value to 64-bit to simplify assembly.
1925 */
1926IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1927{
1928 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1929}
1930
1931
1932/**
1933 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1934 * to 64 bits.
1935 * @note Zero extending the value to 64-bit to simplify assembly.
1936 */
1937IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1938{
1939 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
1940}
1941
1942
1943/**
1944 * Used by TB code to load unsigned 64-bit data w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1947{
1948 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
1949}
1950
1951
1952/**
1953 * Used by TB code to store unsigned 8-bit data w/ flat address.
1954 */
1955IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1956{
1957 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1958}
1959
1960
1961/**
1962 * Used by TB code to store unsigned 16-bit data w/ flat address.
1963 */
1964IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1965{
1966 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 32-bit data w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1974{
1975 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1976}
1977
1978
1979/**
1980 * Used by TB code to store unsigned 64-bit data w/ flat address.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1983{
1984 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1985}
1986
1987
1988
1989/**
1990 * Used by TB code to push unsigned 16-bit value onto a flat 32-bit stack.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
1993{
1994 iemMemFlat32StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat32StackPushU16SafeJmp */
1995}
1996
1997
1998/**
1999 * Used by TB code to push unsigned 32-bit value onto a flat 32-bit stack.
2000 */
2001IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
2002{
2003 iemMemFlat32StackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SafeJmp */
2004}
2005
2006
2007/**
2008 * Used by TB code to push segment selector value onto a flat 32-bit stack.
2009 *
2010 * Intel CPUs doesn't do write a whole dword, thus the special function.
2011 */
2012IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
2013{
2014 iemMemFlat32StackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SRegSafeJmp */
2015}
2016
2017
2018/**
2019 * Used by TB code to pop a 16-bit general purpose register off a flat 32-bit stack.
2020 */
2021IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2022{
2023 iemMemFlat32StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU16SafeJmp */
2024}
2025
2026
2027/**
2028 * Used by TB code to pop a 64-bit general purpose register off a flat 32-bit stack.
2029 */
2030IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
2031{
2032 iemMemFlat32StackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU32SafeJmp */
2033}
2034
2035
2036
2037/**
2038 * Used by TB code to push unsigned 16-bit value onto a flat 64-bit stack.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
2041{
2042 iemMemFlat64StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat64StackPushU16SafeJmp */
2043}
2044
2045
2046/**
2047 * Used by TB code to push unsigned 64-bit value onto a flat 64-bit stack.
2048 */
2049IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
2050{
2051 iemMemFlat64StackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemFlat64StackPushU64SafeJmp */
2052}
2053
2054
2055/**
2056 * Used by TB code to pop a 16-bit general purpose register off a flat 64-bit stack.
2057 */
2058IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
2059{
2060 iemMemFlat64StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU16SafeJmp */
2061}
2062
2063
2064/**
2065 * Used by TB code to pop a 64-bit general purpose register off a flat 64-bit stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
2068{
2069 iemMemFlat64StackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU64SafeJmp */
2070}
2071
2072
2073
2074/*********************************************************************************************************************************
2075* Helpers: Segmented memory mapping. *
2076*********************************************************************************************************************************/
2077
2078/**
2079 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2082 RTGCPTR GCPtrMem, uint8_t iSegReg))
2083{
2084 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
2085}
2086
2087
2088/**
2089 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2092 RTGCPTR GCPtrMem, uint8_t iSegReg))
2093{
2094 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
2095}
2096
2097
2098/**
2099 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2102 RTGCPTR GCPtrMem, uint8_t iSegReg))
2103{
2104 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
2105}
2106
2107
2108/**
2109 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2110 */
2111IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2112 RTGCPTR GCPtrMem, uint8_t iSegReg))
2113{
2114 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
2115}
2116
2117
2118/**
2119 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2122 RTGCPTR GCPtrMem, uint8_t iSegReg))
2123{
2124 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
2125}
2126
2127
2128/**
2129 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2132 RTGCPTR GCPtrMem, uint8_t iSegReg))
2133{
2134 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
2135}
2136
2137
2138/**
2139 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2140 */
2141IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2142 RTGCPTR GCPtrMem, uint8_t iSegReg))
2143{
2144 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
2145}
2146
2147
2148/**
2149 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2150 */
2151IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2152 RTGCPTR GCPtrMem, uint8_t iSegReg))
2153{
2154 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
2155}
2156
2157
2158/**
2159 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2160 */
2161IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2162 RTGCPTR GCPtrMem, uint8_t iSegReg))
2163{
2164 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
2165}
2166
2167
2168/**
2169 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2172 RTGCPTR GCPtrMem, uint8_t iSegReg))
2173{
2174 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
2175}
2176
2177
2178/**
2179 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2182 RTGCPTR GCPtrMem, uint8_t iSegReg))
2183{
2184 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
2185}
2186
2187
2188/**
2189 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2190 */
2191IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2192 RTGCPTR GCPtrMem, uint8_t iSegReg))
2193{
2194 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
2195}
2196
2197
2198/**
2199 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2202 RTGCPTR GCPtrMem, uint8_t iSegReg))
2203{
2204 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
2205}
2206
2207
2208/**
2209 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2212 RTGCPTR GCPtrMem, uint8_t iSegReg))
2213{
2214 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
2215}
2216
2217
2218/**
2219 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2220 */
2221IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2222 RTGCPTR GCPtrMem, uint8_t iSegReg))
2223{
2224 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
2225}
2226
2227
2228/**
2229 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2230 */
2231IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2232 RTGCPTR GCPtrMem, uint8_t iSegReg))
2233{
2234 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
2235}
2236
2237
2238/**
2239 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2240 */
2241IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2242 RTGCPTR GCPtrMem, uint8_t iSegReg))
2243{
2244 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
2245}
2246
2247
2248/*********************************************************************************************************************************
2249* Helpers: Flat memory mapping. *
2250*********************************************************************************************************************************/
2251
2252/**
2253 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2254 */
2255IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2256{
2257 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
2258}
2259
2260
2261/**
2262 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2263 */
2264IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2265{
2266 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
2267}
2268
2269
2270/**
2271 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2272 */
2273IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2274{
2275 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
2276}
2277
2278
2279/**
2280 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2281 */
2282IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2283{
2284 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
2285}
2286
2287
2288/**
2289 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2290 */
2291IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2292{
2293 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
2294}
2295
2296
2297/**
2298 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2299 */
2300IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2301{
2302 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
2303}
2304
2305
2306/**
2307 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2308 */
2309IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2310{
2311 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2319{
2320 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2328{
2329 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
2330}
2331
2332
2333/**
2334 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2337{
2338 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
2339}
2340
2341
2342/**
2343 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2344 */
2345IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2346{
2347 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
2348}
2349
2350
2351/**
2352 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2353 */
2354IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2355{
2356 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
2357}
2358
2359
2360/**
2361 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2362 */
2363IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2364{
2365 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
2366}
2367
2368
2369/**
2370 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2373{
2374 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
2375}
2376
2377
2378/**
2379 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2380 */
2381IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2382{
2383 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
2384}
2385
2386
2387/**
2388 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2389 */
2390IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2391{
2392 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
2393}
2394
2395
2396/**
2397 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2400{
2401 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
2402}
2403
2404
2405/*********************************************************************************************************************************
2406* Helpers: Commit, rollback & unmap *
2407*********************************************************************************************************************************/
2408
2409/**
2410 * Used by TB code to commit and unmap a read-write memory mapping.
2411 */
2412IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2413{
2414 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2415}
2416
2417
2418/**
2419 * Used by TB code to commit and unmap a write-only memory mapping.
2420 */
2421IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2422{
2423 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2424}
2425
2426
2427/**
2428 * Used by TB code to commit and unmap a read-only memory mapping.
2429 */
2430IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2431{
2432 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2433}
2434
2435
2436/**
2437 * Reinitializes the native recompiler state.
2438 *
2439 * Called before starting a new recompile job.
2440 */
2441static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2442{
2443 pReNative->cLabels = 0;
2444 pReNative->bmLabelTypes = 0;
2445 pReNative->cFixups = 0;
2446#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2447 pReNative->pDbgInfo->cEntries = 0;
2448#endif
2449 pReNative->pTbOrg = pTb;
2450 pReNative->cCondDepth = 0;
2451 pReNative->uCondSeqNo = 0;
2452 pReNative->uCheckIrqSeqNo = 0;
2453 pReNative->uTlbSeqNo = 0;
2454
2455 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2456#if IEMNATIVE_HST_GREG_COUNT < 32
2457 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2458#endif
2459 ;
2460 pReNative->Core.bmHstRegsWithGstShadow = 0;
2461 pReNative->Core.bmGstRegShadows = 0;
2462 pReNative->Core.bmVars = 0;
2463 pReNative->Core.bmStack = 0;
2464 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2465 pReNative->Core.u64ArgVars = UINT64_MAX;
2466
2467 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2468 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2469 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2470 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2471 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2472 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2473 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2474 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2475 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2476 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2477
2478 /* Full host register reinit: */
2479 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2480 {
2481 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2482 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2483 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2484 }
2485
2486 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2487 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2488#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2489 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2490#endif
2491#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2492 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2493#endif
2494 );
2495 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2496 {
2497 fRegs &= ~RT_BIT_32(idxReg);
2498 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2499 }
2500
2501 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2502#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2503 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2504#endif
2505#ifdef IEMNATIVE_REG_FIXED_TMP0
2506 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2507#endif
2508 return pReNative;
2509}
2510
2511
2512/**
2513 * Allocates and initializes the native recompiler state.
2514 *
2515 * This is called the first time an EMT wants to recompile something.
2516 *
2517 * @returns Pointer to the new recompiler state.
2518 * @param pVCpu The cross context virtual CPU structure of the calling
2519 * thread.
2520 * @param pTb The TB that's about to be recompiled.
2521 * @thread EMT(pVCpu)
2522 */
2523static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2524{
2525 VMCPU_ASSERT_EMT(pVCpu);
2526
2527 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2528 AssertReturn(pReNative, NULL);
2529
2530 /*
2531 * Try allocate all the buffers and stuff we need.
2532 */
2533 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2534 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2535 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2536#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2537 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2538#endif
2539 if (RT_LIKELY( pReNative->pInstrBuf
2540 && pReNative->paLabels
2541 && pReNative->paFixups)
2542#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2543 && pReNative->pDbgInfo
2544#endif
2545 )
2546 {
2547 /*
2548 * Set the buffer & array sizes on success.
2549 */
2550 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2551 pReNative->cLabelsAlloc = _8K;
2552 pReNative->cFixupsAlloc = _16K;
2553#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2554 pReNative->cDbgInfoAlloc = _16K;
2555#endif
2556
2557 /*
2558 * Done, just need to save it and reinit it.
2559 */
2560 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2561 return iemNativeReInit(pReNative, pTb);
2562 }
2563
2564 /*
2565 * Failed. Cleanup and return.
2566 */
2567 AssertFailed();
2568 RTMemFree(pReNative->pInstrBuf);
2569 RTMemFree(pReNative->paLabels);
2570 RTMemFree(pReNative->paFixups);
2571#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2572 RTMemFree(pReNative->pDbgInfo);
2573#endif
2574 RTMemFree(pReNative);
2575 return NULL;
2576}
2577
2578
2579/**
2580 * Creates a label
2581 *
2582 * If the label does not yet have a defined position,
2583 * call iemNativeLabelDefine() later to set it.
2584 *
2585 * @returns Label ID. Throws VBox status code on failure, so no need to check
2586 * the return value.
2587 * @param pReNative The native recompile state.
2588 * @param enmType The label type.
2589 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2590 * label is not yet defined (default).
2591 * @param uData Data associated with the lable. Only applicable to
2592 * certain type of labels. Default is zero.
2593 */
2594DECL_HIDDEN_THROW(uint32_t)
2595iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2596 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2597{
2598 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2599
2600 /*
2601 * Locate existing label definition.
2602 *
2603 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2604 * and uData is zero.
2605 */
2606 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2607 uint32_t const cLabels = pReNative->cLabels;
2608 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2609#ifndef VBOX_STRICT
2610 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2611 && offWhere == UINT32_MAX
2612 && uData == 0
2613#endif
2614 )
2615 {
2616#ifndef VBOX_STRICT
2617 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2618 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2619 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2620 if (idxLabel < pReNative->cLabels)
2621 return idxLabel;
2622#else
2623 for (uint32_t i = 0; i < cLabels; i++)
2624 if ( paLabels[i].enmType == enmType
2625 && paLabels[i].uData == uData)
2626 {
2627 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2628 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2629 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2630 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2631 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2632 return i;
2633 }
2634 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2635 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2636#endif
2637 }
2638
2639 /*
2640 * Make sure we've got room for another label.
2641 */
2642 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2643 { /* likely */ }
2644 else
2645 {
2646 uint32_t cNew = pReNative->cLabelsAlloc;
2647 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2648 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2649 cNew *= 2;
2650 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2651 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2652 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2653 pReNative->paLabels = paLabels;
2654 pReNative->cLabelsAlloc = cNew;
2655 }
2656
2657 /*
2658 * Define a new label.
2659 */
2660 paLabels[cLabels].off = offWhere;
2661 paLabels[cLabels].enmType = enmType;
2662 paLabels[cLabels].uData = uData;
2663 pReNative->cLabels = cLabels + 1;
2664
2665 Assert((unsigned)enmType < 64);
2666 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2667
2668 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2669 {
2670 Assert(uData == 0);
2671 pReNative->aidxUniqueLabels[enmType] = cLabels;
2672 }
2673
2674 if (offWhere != UINT32_MAX)
2675 {
2676#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2677 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2678 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2679#endif
2680 }
2681 return cLabels;
2682}
2683
2684
2685/**
2686 * Defines the location of an existing label.
2687 *
2688 * @param pReNative The native recompile state.
2689 * @param idxLabel The label to define.
2690 * @param offWhere The position.
2691 */
2692DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2693{
2694 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2695 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2696 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2697 pLabel->off = offWhere;
2698#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2699 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2700 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2701#endif
2702}
2703
2704
2705/**
2706 * Looks up a lable.
2707 *
2708 * @returns Label ID if found, UINT32_MAX if not.
2709 */
2710static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2711 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2712{
2713 Assert((unsigned)enmType < 64);
2714 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2715 {
2716 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2717 return pReNative->aidxUniqueLabels[enmType];
2718
2719 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2720 uint32_t const cLabels = pReNative->cLabels;
2721 for (uint32_t i = 0; i < cLabels; i++)
2722 if ( paLabels[i].enmType == enmType
2723 && paLabels[i].uData == uData
2724 && ( paLabels[i].off == offWhere
2725 || offWhere == UINT32_MAX
2726 || paLabels[i].off == UINT32_MAX))
2727 return i;
2728 }
2729 return UINT32_MAX;
2730}
2731
2732
2733/**
2734 * Adds a fixup.
2735 *
2736 * @throws VBox status code (int) on failure.
2737 * @param pReNative The native recompile state.
2738 * @param offWhere The instruction offset of the fixup location.
2739 * @param idxLabel The target label ID for the fixup.
2740 * @param enmType The fixup type.
2741 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2742 */
2743DECL_HIDDEN_THROW(void)
2744iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2745 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2746{
2747 Assert(idxLabel <= UINT16_MAX);
2748 Assert((unsigned)enmType <= UINT8_MAX);
2749
2750 /*
2751 * Make sure we've room.
2752 */
2753 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2754 uint32_t const cFixups = pReNative->cFixups;
2755 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2756 { /* likely */ }
2757 else
2758 {
2759 uint32_t cNew = pReNative->cFixupsAlloc;
2760 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2761 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2762 cNew *= 2;
2763 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2764 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2765 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2766 pReNative->paFixups = paFixups;
2767 pReNative->cFixupsAlloc = cNew;
2768 }
2769
2770 /*
2771 * Add the fixup.
2772 */
2773 paFixups[cFixups].off = offWhere;
2774 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2775 paFixups[cFixups].enmType = enmType;
2776 paFixups[cFixups].offAddend = offAddend;
2777 pReNative->cFixups = cFixups + 1;
2778}
2779
2780
2781/**
2782 * Slow code path for iemNativeInstrBufEnsure.
2783 */
2784DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2785{
2786 /* Double the buffer size till we meet the request. */
2787 uint32_t cNew = pReNative->cInstrBufAlloc;
2788 AssertReturn(cNew > 0, NULL);
2789 do
2790 cNew *= 2;
2791 while (cNew < off + cInstrReq);
2792
2793 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2794#ifdef RT_ARCH_ARM64
2795 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2796#else
2797 uint32_t const cbMaxInstrBuf = _2M;
2798#endif
2799 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2800
2801 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2802 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2803
2804 pReNative->cInstrBufAlloc = cNew;
2805 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2806}
2807
2808#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2809
2810/**
2811 * Grows the static debug info array used during recompilation.
2812 *
2813 * @returns Pointer to the new debug info block; throws VBox status code on
2814 * failure, so no need to check the return value.
2815 */
2816DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2817{
2818 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2819 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2820 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2821 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2822 pReNative->pDbgInfo = pDbgInfo;
2823 pReNative->cDbgInfoAlloc = cNew;
2824 return pDbgInfo;
2825}
2826
2827
2828/**
2829 * Adds a new debug info uninitialized entry, returning the pointer to it.
2830 */
2831DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2832{
2833 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2834 { /* likely */ }
2835 else
2836 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2837 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2838}
2839
2840
2841/**
2842 * Debug Info: Adds a native offset record, if necessary.
2843 */
2844static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2845{
2846 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2847
2848 /*
2849 * Search backwards to see if we've got a similar record already.
2850 */
2851 uint32_t idx = pDbgInfo->cEntries;
2852 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2853 while (idx-- > idxStop)
2854 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2855 {
2856 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2857 return;
2858 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2859 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2860 break;
2861 }
2862
2863 /*
2864 * Add it.
2865 */
2866 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2867 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2868 pEntry->NativeOffset.offNative = off;
2869}
2870
2871
2872/**
2873 * Debug Info: Record info about a label.
2874 */
2875static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2876{
2877 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2878 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2879 pEntry->Label.uUnused = 0;
2880 pEntry->Label.enmLabel = (uint8_t)enmType;
2881 pEntry->Label.uData = uData;
2882}
2883
2884
2885/**
2886 * Debug Info: Record info about a threaded call.
2887 */
2888static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2889{
2890 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2891 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2892 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2893 pEntry->ThreadedCall.uUnused = 0;
2894 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2895}
2896
2897
2898/**
2899 * Debug Info: Record info about a new guest instruction.
2900 */
2901static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2902{
2903 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2904 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2905 pEntry->GuestInstruction.uUnused = 0;
2906 pEntry->GuestInstruction.fExec = fExec;
2907}
2908
2909
2910/**
2911 * Debug Info: Record info about guest register shadowing.
2912 */
2913static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2914 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2915{
2916 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2917 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2918 pEntry->GuestRegShadowing.uUnused = 0;
2919 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2920 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2921 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2922}
2923
2924#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2925
2926
2927/*********************************************************************************************************************************
2928* Register Allocator *
2929*********************************************************************************************************************************/
2930
2931/**
2932 * Register parameter indexes (indexed by argument number).
2933 */
2934DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2935{
2936 IEMNATIVE_CALL_ARG0_GREG,
2937 IEMNATIVE_CALL_ARG1_GREG,
2938 IEMNATIVE_CALL_ARG2_GREG,
2939 IEMNATIVE_CALL_ARG3_GREG,
2940#if defined(IEMNATIVE_CALL_ARG4_GREG)
2941 IEMNATIVE_CALL_ARG4_GREG,
2942# if defined(IEMNATIVE_CALL_ARG5_GREG)
2943 IEMNATIVE_CALL_ARG5_GREG,
2944# if defined(IEMNATIVE_CALL_ARG6_GREG)
2945 IEMNATIVE_CALL_ARG6_GREG,
2946# if defined(IEMNATIVE_CALL_ARG7_GREG)
2947 IEMNATIVE_CALL_ARG7_GREG,
2948# endif
2949# endif
2950# endif
2951#endif
2952};
2953
2954/**
2955 * Call register masks indexed by argument count.
2956 */
2957DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2958{
2959 0,
2960 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2961 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2962 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2963 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2964 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2965#if defined(IEMNATIVE_CALL_ARG4_GREG)
2966 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2967 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2968# if defined(IEMNATIVE_CALL_ARG5_GREG)
2969 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2970 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2971# if defined(IEMNATIVE_CALL_ARG6_GREG)
2972 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2973 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2974 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2975# if defined(IEMNATIVE_CALL_ARG7_GREG)
2976 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2977 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2978 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2979# endif
2980# endif
2981# endif
2982#endif
2983};
2984
2985#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2986/**
2987 * BP offset of the stack argument slots.
2988 *
2989 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2990 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2991 */
2992DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2993{
2994 IEMNATIVE_FP_OFF_STACK_ARG0,
2995# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2996 IEMNATIVE_FP_OFF_STACK_ARG1,
2997# endif
2998# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2999 IEMNATIVE_FP_OFF_STACK_ARG2,
3000# endif
3001# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3002 IEMNATIVE_FP_OFF_STACK_ARG3,
3003# endif
3004};
3005AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3006#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3007
3008/**
3009 * Info about shadowed guest register values.
3010 * @see IEMNATIVEGSTREG
3011 */
3012static struct
3013{
3014 /** Offset in VMCPU. */
3015 uint32_t off;
3016 /** The field size. */
3017 uint8_t cb;
3018 /** Name (for logging). */
3019 const char *pszName;
3020} const g_aGstShadowInfo[] =
3021{
3022#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3023 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3024 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3025 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3026 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3027 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3028 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3029 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3030 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3031 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3032 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3033 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3034 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3035 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3036 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3037 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3038 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3039 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3040 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3041 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3042 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3043 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3044 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3045 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3046 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3047 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3048 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3049 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3050 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3051 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3052 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3053 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3054 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3055 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3056 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3057 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3058 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3059#undef CPUMCTX_OFF_AND_SIZE
3060};
3061AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3062
3063
3064/** Host CPU general purpose register names. */
3065DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3066{
3067#ifdef RT_ARCH_AMD64
3068 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3069#elif RT_ARCH_ARM64
3070 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3071 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3072#else
3073# error "port me"
3074#endif
3075};
3076
3077
3078DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3079 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3080{
3081 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3082
3083 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3084 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3085 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3086 return (uint8_t)idxReg;
3087}
3088
3089
3090/**
3091 * Tries to locate a suitable register in the given register mask.
3092 *
3093 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3094 * failed.
3095 *
3096 * @returns Host register number on success, returns UINT8_MAX on failure.
3097 */
3098static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3099{
3100 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3101 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3102 if (fRegs)
3103 {
3104 /** @todo pick better here: */
3105 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3106
3107 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3108 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3109 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3110 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3111
3112 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3113 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3114 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3115 return idxReg;
3116 }
3117 return UINT8_MAX;
3118}
3119
3120
3121/**
3122 * Locate a register, possibly freeing one up.
3123 *
3124 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3125 * failed.
3126 *
3127 * @returns Host register number on success. Returns UINT8_MAX if no registers
3128 * found, the caller is supposed to deal with this and raise a
3129 * allocation type specific status code (if desired).
3130 *
3131 * @throws VBox status code if we're run into trouble spilling a variable of
3132 * recording debug info. Does NOT throw anything if we're out of
3133 * registers, though.
3134 */
3135static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3136 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3137{
3138 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3139 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3140
3141 /*
3142 * Try a freed register that's shadowing a guest register
3143 */
3144 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3145 if (fRegs)
3146 {
3147 unsigned const idxReg = (fPreferVolatile
3148 ? ASMBitFirstSetU32(fRegs)
3149 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3150 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
3151 - 1;
3152
3153 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3154 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3155 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3156 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3157
3158 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3159 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3160 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3161 return idxReg;
3162 }
3163
3164 /*
3165 * Try free up a variable that's in a register.
3166 *
3167 * We do two rounds here, first evacuating variables we don't need to be
3168 * saved on the stack, then in the second round move things to the stack.
3169 */
3170 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3171 {
3172 uint32_t fVars = pReNative->Core.bmVars;
3173 while (fVars)
3174 {
3175 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3176 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3177 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3178 && (RT_BIT_32(idxReg) & fRegMask)
3179 && ( iLoop == 0
3180 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3181 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3182 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3183 {
3184 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3185 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3186 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3187 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3188 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3189 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3190
3191 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3192 {
3193 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3194 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3195 }
3196
3197 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3198 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3199
3200 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3201 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3202 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3203 return idxReg;
3204 }
3205 fVars &= ~RT_BIT_32(idxVar);
3206 }
3207 }
3208
3209 return UINT8_MAX;
3210}
3211
3212
3213/**
3214 * Reassigns a variable to a different register specified by the caller.
3215 *
3216 * @returns The new code buffer position.
3217 * @param pReNative The native recompile state.
3218 * @param off The current code buffer position.
3219 * @param idxVar The variable index.
3220 * @param idxRegOld The old host register number.
3221 * @param idxRegNew The new host register number.
3222 * @param pszCaller The caller for logging.
3223 */
3224static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3225 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3226{
3227 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3228 RT_NOREF(pszCaller);
3229
3230 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3231
3232 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3233 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3234 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3235 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3236
3237 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3238 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3239 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3240 if (fGstRegShadows)
3241 {
3242 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3243 | RT_BIT_32(idxRegNew);
3244 while (fGstRegShadows)
3245 {
3246 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3247 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3248
3249 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3250 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3251 }
3252 }
3253
3254 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3255 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3256 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3257 return off;
3258}
3259
3260
3261/**
3262 * Moves a variable to a different register or spills it onto the stack.
3263 *
3264 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3265 * kinds can easily be recreated if needed later.
3266 *
3267 * @returns The new code buffer position.
3268 * @param pReNative The native recompile state.
3269 * @param off The current code buffer position.
3270 * @param idxVar The variable index.
3271 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3272 * call-volatile registers.
3273 */
3274static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3275 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3276{
3277 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3278 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3279 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3280
3281 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3282 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3283 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3284 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3285 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3286 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3287 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3288 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3289 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3290
3291
3292 /** @todo Add statistics on this.*/
3293 /** @todo Implement basic variable liveness analysis (python) so variables
3294 * can be freed immediately once no longer used. This has the potential to
3295 * be trashing registers and stack for dead variables. */
3296
3297 /*
3298 * First try move it to a different register, as that's cheaper.
3299 */
3300 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3301 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3302 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3303 if (fRegs)
3304 {
3305 /* Avoid using shadow registers, if possible. */
3306 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3307 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3308 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3309 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3310 }
3311
3312 /*
3313 * Otherwise we must spill the register onto the stack.
3314 */
3315 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3316 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3317 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3318 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3319
3320 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3321 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3322 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3323 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3324 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3325 return off;
3326}
3327
3328
3329/**
3330 * Allocates a temporary host general purpose register.
3331 *
3332 * This may emit code to save register content onto the stack in order to free
3333 * up a register.
3334 *
3335 * @returns The host register number; throws VBox status code on failure,
3336 * so no need to check the return value.
3337 * @param pReNative The native recompile state.
3338 * @param poff Pointer to the variable with the code buffer position.
3339 * This will be update if we need to move a variable from
3340 * register to stack in order to satisfy the request.
3341 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3342 * registers (@c true, default) or the other way around
3343 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3344 */
3345DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3346{
3347 /*
3348 * Try find a completely unused register, preferably a call-volatile one.
3349 */
3350 uint8_t idxReg;
3351 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3352 & ~pReNative->Core.bmHstRegsWithGstShadow
3353 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3354 if (fRegs)
3355 {
3356 if (fPreferVolatile)
3357 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3358 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3359 else
3360 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3361 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3362 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3363 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3364 }
3365 else
3366 {
3367 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3368 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3369 }
3370 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3371}
3372
3373
3374/**
3375 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3376 * registers.
3377 *
3378 * @returns The host register number; throws VBox status code on failure,
3379 * so no need to check the return value.
3380 * @param pReNative The native recompile state.
3381 * @param poff Pointer to the variable with the code buffer position.
3382 * This will be update if we need to move a variable from
3383 * register to stack in order to satisfy the request.
3384 * @param fRegMask Mask of acceptable registers.
3385 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3386 * registers (@c true, default) or the other way around
3387 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3388 */
3389DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3390 bool fPreferVolatile /*= true*/)
3391{
3392 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3393 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3394
3395 /*
3396 * Try find a completely unused register, preferably a call-volatile one.
3397 */
3398 uint8_t idxReg;
3399 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3400 & ~pReNative->Core.bmHstRegsWithGstShadow
3401 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3402 & fRegMask;
3403 if (fRegs)
3404 {
3405 if (fPreferVolatile)
3406 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3407 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3408 else
3409 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3410 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3411 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3412 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3413 }
3414 else
3415 {
3416 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3417 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3418 }
3419 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3420}
3421
3422
3423/**
3424 * Allocates a temporary register for loading an immediate value into.
3425 *
3426 * This will emit code to load the immediate, unless there happens to be an
3427 * unused register with the value already loaded.
3428 *
3429 * The caller will not modify the returned register, it must be considered
3430 * read-only. Free using iemNativeRegFreeTmpImm.
3431 *
3432 * @returns The host register number; throws VBox status code on failure, so no
3433 * need to check the return value.
3434 * @param pReNative The native recompile state.
3435 * @param poff Pointer to the variable with the code buffer position.
3436 * @param uImm The immediate value that the register must hold upon
3437 * return.
3438 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3439 * registers (@c true, default) or the other way around
3440 * (@c false).
3441 *
3442 * @note Reusing immediate values has not been implemented yet.
3443 */
3444DECL_HIDDEN_THROW(uint8_t)
3445iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3446{
3447 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3448 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3449 return idxReg;
3450}
3451
3452
3453/**
3454 * Marks host register @a idxHstReg as containing a shadow copy of guest
3455 * register @a enmGstReg.
3456 *
3457 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
3458 * host register before calling.
3459 */
3460DECL_FORCE_INLINE(void)
3461iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3462{
3463 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
3464 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3465 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3466
3467 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
3468 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
3469 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
3470 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3471#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3472 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3473 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
3474#else
3475 RT_NOREF(off);
3476#endif
3477}
3478
3479
3480/**
3481 * Clear any guest register shadow claims from @a idxHstReg.
3482 *
3483 * The register does not need to be shadowing any guest registers.
3484 */
3485DECL_FORCE_INLINE(void)
3486iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
3487{
3488 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3489 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3490 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3491 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3492 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3493
3494#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3495 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3496 if (fGstRegs)
3497 {
3498 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
3499 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3500 while (fGstRegs)
3501 {
3502 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3503 fGstRegs &= ~RT_BIT_64(iGstReg);
3504 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
3505 }
3506 }
3507#else
3508 RT_NOREF(off);
3509#endif
3510
3511 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3512 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3513 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3514}
3515
3516
3517/**
3518 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
3519 * and global overview flags.
3520 */
3521DECL_FORCE_INLINE(void)
3522iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3523{
3524 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3525 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3526 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
3527 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3528 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
3529 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3530 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3531
3532#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3533 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3534 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
3535#else
3536 RT_NOREF(off);
3537#endif
3538
3539 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3540 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
3541 if (!fGstRegShadowsNew)
3542 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3543 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
3544}
3545
3546
3547/**
3548 * Clear any guest register shadow claim for @a enmGstReg.
3549 */
3550DECL_FORCE_INLINE(void)
3551iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
3552{
3553 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3554 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3555 {
3556 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3557 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3558 }
3559}
3560
3561
3562/**
3563 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
3564 * as the new shadow of it.
3565 */
3566DECL_FORCE_INLINE(void)
3567iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
3568 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3569{
3570 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3571 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3572 {
3573 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
3574 if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
3575 return;
3576 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
3577 }
3578 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
3579}
3580
3581
3582/**
3583 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
3584 * to @a idxRegTo.
3585 */
3586DECL_FORCE_INLINE(void)
3587iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
3588 IEMNATIVEGSTREG enmGstReg, uint32_t off)
3589{
3590 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
3591 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
3592 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
3593 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
3594 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3595 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
3596 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
3597 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
3598 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
3599
3600 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
3601 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
3602 if (!fGstRegShadowsFrom)
3603 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
3604 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
3605 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
3606 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
3607#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3608 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3609 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
3610#else
3611 RT_NOREF(off);
3612#endif
3613}
3614
3615
3616/**
3617 * Allocates a temporary host general purpose register for keeping a guest
3618 * register value.
3619 *
3620 * Since we may already have a register holding the guest register value,
3621 * code will be emitted to do the loading if that's not the case. Code may also
3622 * be emitted if we have to free up a register to satify the request.
3623 *
3624 * @returns The host register number; throws VBox status code on failure, so no
3625 * need to check the return value.
3626 * @param pReNative The native recompile state.
3627 * @param poff Pointer to the variable with the code buffer
3628 * position. This will be update if we need to move a
3629 * variable from register to stack in order to satisfy
3630 * the request.
3631 * @param enmGstReg The guest register that will is to be updated.
3632 * @param enmIntendedUse How the caller will be using the host register.
3633 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3634 */
3635DECL_HIDDEN_THROW(uint8_t)
3636iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3637 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
3638{
3639 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3640#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3641 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3642#endif
3643
3644 /*
3645 * First check if the guest register value is already in a host register.
3646 */
3647 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3648 {
3649 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3650 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3651 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3652 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3653
3654 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3655 {
3656 /*
3657 * If the register will trash the guest shadow copy, try find a
3658 * completely unused register we can use instead. If that fails,
3659 * we need to disassociate the host reg from the guest reg.
3660 */
3661 /** @todo would be nice to know if preserving the register is in any way helpful. */
3662 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3663 && ( ~pReNative->Core.bmHstRegs
3664 & ~pReNative->Core.bmHstRegsWithGstShadow
3665 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3666 {
3667 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
3668
3669 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3670
3671 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3672 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3673 g_apszIemNativeHstRegNames[idxRegNew]));
3674 idxReg = idxRegNew;
3675 }
3676 else
3677 {
3678 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3681 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3682 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3683 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3684 else
3685 {
3686 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3687 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3688 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3689 }
3690 }
3691 }
3692 else
3693 {
3694 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3695 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3696 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3697 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3698
3699 /*
3700 * Allocate a new register, copy the value and, if updating, the
3701 * guest shadow copy assignment to the new register.
3702 */
3703 /** @todo share register for readonly access. */
3704 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3705
3706 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3707 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3708
3709 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3710 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3711 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3712 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3713 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3714 else
3715 {
3716 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3717 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3718 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3719 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3720 }
3721 idxReg = idxRegNew;
3722 }
3723
3724#ifdef VBOX_STRICT
3725 /* Strict builds: Check that the value is correct. */
3726 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3727#endif
3728
3729 return idxReg;
3730 }
3731
3732 /*
3733 * Allocate a new register, load it with the guest value and designate it as a copy of the
3734 */
3735 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3736
3737 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3738 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3739
3740 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3741 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3742 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3743 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3744
3745 return idxRegNew;
3746}
3747
3748
3749/**
3750 * Allocates a temporary host general purpose register that already holds the
3751 * given guest register value.
3752 *
3753 * The use case for this function is places where the shadowing state cannot be
3754 * modified due to branching and such. This will fail if the we don't have a
3755 * current shadow copy handy or if it's incompatible. The only code that will
3756 * be emitted here is value checking code in strict builds.
3757 *
3758 * The intended use can only be readonly!
3759 *
3760 * @returns The host register number, UINT8_MAX if not present.
3761 * @param pReNative The native recompile state.
3762 * @param poff Pointer to the instruction buffer offset.
3763 * Will be updated in strict builds if a register is
3764 * found.
3765 * @param enmGstReg The guest register that will is to be updated.
3766 * @note In strict builds, this may throw instruction buffer growth failures.
3767 * Non-strict builds will not throw anything.
3768 * @sa iemNativeRegAllocTmpForGuestReg
3769 */
3770DECL_HIDDEN_THROW(uint8_t)
3771iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3772{
3773 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3774
3775 /*
3776 * First check if the guest register value is already in a host register.
3777 */
3778 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3779 {
3780 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3781 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3782 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3783 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3784
3785 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3786 {
3787 /*
3788 * We only do readonly use here, so easy compared to the other
3789 * variant of this code.
3790 */
3791 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3792 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3793 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3794 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3795 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3796
3797#ifdef VBOX_STRICT
3798 /* Strict builds: Check that the value is correct. */
3799 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3800#else
3801 RT_NOREF(poff);
3802#endif
3803 return idxReg;
3804 }
3805 }
3806
3807 return UINT8_MAX;
3808}
3809
3810
3811DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3812
3813
3814/**
3815 * Allocates argument registers for a function call.
3816 *
3817 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3818 * need to check the return value.
3819 * @param pReNative The native recompile state.
3820 * @param off The current code buffer offset.
3821 * @param cArgs The number of arguments the function call takes.
3822 */
3823DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3824{
3825 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3826 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3827 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3828 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3829
3830 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3831 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3832 else if (cArgs == 0)
3833 return true;
3834
3835 /*
3836 * Do we get luck and all register are free and not shadowing anything?
3837 */
3838 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3839 for (uint32_t i = 0; i < cArgs; i++)
3840 {
3841 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3842 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3843 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3844 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3845 }
3846 /*
3847 * Okay, not lucky so we have to free up the registers.
3848 */
3849 else
3850 for (uint32_t i = 0; i < cArgs; i++)
3851 {
3852 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3853 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3854 {
3855 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3856 {
3857 case kIemNativeWhat_Var:
3858 {
3859 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3860 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3861 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3862 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3863 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3864
3865 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3866 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3867 else
3868 {
3869 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3870 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3871 }
3872 break;
3873 }
3874
3875 case kIemNativeWhat_Tmp:
3876 case kIemNativeWhat_Arg:
3877 case kIemNativeWhat_rc:
3878 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3879 default:
3880 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3881 }
3882
3883 }
3884 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3885 {
3886 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3887 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3888 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3889 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3890 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3891 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3892 }
3893 else
3894 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3895 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3896 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3897 }
3898 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3899 return true;
3900}
3901
3902
3903DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3904
3905
3906#if 0
3907/**
3908 * Frees a register assignment of any type.
3909 *
3910 * @param pReNative The native recompile state.
3911 * @param idxHstReg The register to free.
3912 *
3913 * @note Does not update variables.
3914 */
3915DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3916{
3917 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3918 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3919 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3920 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3921 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3922 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3923 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3924 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3925 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3926 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3927 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3928 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3929 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3930 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3931
3932 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3933 /* no flushing, right:
3934 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3935 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3936 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3937 */
3938}
3939#endif
3940
3941
3942/**
3943 * Frees a temporary register.
3944 *
3945 * Any shadow copies of guest registers assigned to the host register will not
3946 * be flushed by this operation.
3947 */
3948DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3949{
3950 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3951 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3952 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3953 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3954 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3955}
3956
3957
3958/**
3959 * Frees a temporary immediate register.
3960 *
3961 * It is assumed that the call has not modified the register, so it still hold
3962 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3963 */
3964DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3965{
3966 iemNativeRegFreeTmp(pReNative, idxHstReg);
3967}
3968
3969
3970/**
3971 * Frees a register assigned to a variable.
3972 *
3973 * The register will be disassociated from the variable.
3974 */
3975DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3976{
3977 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3978 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3979 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3980 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3981 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
3982
3983 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3984 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3985 if (!fFlushShadows)
3986 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
3987 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3988 else
3989 {
3990 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3991 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3992 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3993 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3994 uint64_t fGstRegShadows = fGstRegShadowsOld;
3995 while (fGstRegShadows)
3996 {
3997 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3998 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3999
4000 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4001 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4002 }
4003 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4004 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4005 }
4006}
4007
4008
4009/**
4010 * Called right before emitting a call instruction to move anything important
4011 * out of call-volatile registers, free and flush the call-volatile registers,
4012 * optionally freeing argument variables.
4013 *
4014 * @returns New code buffer offset, UINT32_MAX on failure.
4015 * @param pReNative The native recompile state.
4016 * @param off The code buffer offset.
4017 * @param cArgs The number of arguments the function call takes.
4018 * It is presumed that the host register part of these have
4019 * been allocated as such already and won't need moving,
4020 * just freeing.
4021 * @param fKeepVars Mask of variables that should keep their register
4022 * assignments. Caller must take care to handle these.
4023 */
4024DECL_HIDDEN_THROW(uint32_t)
4025iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars = 0)
4026{
4027 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4028
4029 /* fKeepVars will reduce this mask. */
4030 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4031
4032 /*
4033 * Move anything important out of volatile registers.
4034 */
4035 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4036 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4037 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4038#ifdef IEMNATIVE_REG_FIXED_TMP0
4039 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4040#endif
4041 & ~g_afIemNativeCallRegs[cArgs];
4042
4043 fRegsToMove &= pReNative->Core.bmHstRegs;
4044 if (!fRegsToMove)
4045 { /* likely */ }
4046 else
4047 {
4048 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4049 while (fRegsToMove != 0)
4050 {
4051 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4052 fRegsToMove &= ~RT_BIT_32(idxReg);
4053
4054 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4055 {
4056 case kIemNativeWhat_Var:
4057 {
4058 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4059 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4060 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4061 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4062 if (!(RT_BIT_32(idxVar) & fKeepVars))
4063 {
4064 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4065 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4066 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4067 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4068 else
4069 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4070 }
4071 else
4072 fRegsToFree &= ~RT_BIT_32(idxReg);
4073 continue;
4074 }
4075
4076 case kIemNativeWhat_Arg:
4077 AssertMsgFailed(("What?!?: %u\n", idxReg));
4078 continue;
4079
4080 case kIemNativeWhat_rc:
4081 case kIemNativeWhat_Tmp:
4082 AssertMsgFailed(("Missing free: %u\n", idxReg));
4083 continue;
4084
4085 case kIemNativeWhat_FixedTmp:
4086 case kIemNativeWhat_pVCpuFixed:
4087 case kIemNativeWhat_pCtxFixed:
4088 case kIemNativeWhat_FixedReserved:
4089 case kIemNativeWhat_Invalid:
4090 case kIemNativeWhat_End:
4091 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4092 }
4093 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4094 }
4095 }
4096
4097 /*
4098 * Do the actual freeing.
4099 */
4100 if (pReNative->Core.bmHstRegs & fRegsToFree)
4101 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4102 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4103 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4104
4105 /* If there are guest register shadows in any call-volatile register, we
4106 have to clear the corrsponding guest register masks for each register. */
4107 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4108 if (fHstRegsWithGstShadow)
4109 {
4110 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4111 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4112 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4113 do
4114 {
4115 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4116 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4117
4118 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4119 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4120 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4121 } while (fHstRegsWithGstShadow != 0);
4122 }
4123
4124 return off;
4125}
4126
4127
4128/**
4129 * Flushes a set of guest register shadow copies.
4130 *
4131 * This is usually done after calling a threaded function or a C-implementation
4132 * of an instruction.
4133 *
4134 * @param pReNative The native recompile state.
4135 * @param fGstRegs Set of guest registers to flush.
4136 */
4137DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4138{
4139 /*
4140 * Reduce the mask by what's currently shadowed
4141 */
4142 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4143 fGstRegs &= bmGstRegShadowsOld;
4144 if (fGstRegs)
4145 {
4146 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4147 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4148 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4149 if (bmGstRegShadowsNew)
4150 {
4151 /*
4152 * Partial.
4153 */
4154 do
4155 {
4156 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4157 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4158 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4159 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4160 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4161
4162 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4163 fGstRegs &= ~fInThisHstReg;
4164 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4165 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4166 if (!fGstRegShadowsNew)
4167 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4168 } while (fGstRegs != 0);
4169 }
4170 else
4171 {
4172 /*
4173 * Clear all.
4174 */
4175 do
4176 {
4177 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4178 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4179 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4180 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4181 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4182
4183 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4184 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4185 } while (fGstRegs != 0);
4186 pReNative->Core.bmHstRegsWithGstShadow = 0;
4187 }
4188 }
4189}
4190
4191
4192/**
4193 * Flushes delayed write of a specific guest register.
4194 *
4195 * This must be called prior to calling CImpl functions and any helpers that use
4196 * the guest state (like raising exceptions) and such.
4197 *
4198 * This optimization has not yet been implemented. The first target would be
4199 * RIP updates, since these are the most common ones.
4200 */
4201DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4202 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4203{
4204 RT_NOREF(pReNative, enmClass, idxReg);
4205 return off;
4206}
4207
4208
4209/**
4210 * Flushes any delayed guest register writes.
4211 *
4212 * This must be called prior to calling CImpl functions and any helpers that use
4213 * the guest state (like raising exceptions) and such.
4214 *
4215 * This optimization has not yet been implemented. The first target would be
4216 * RIP updates, since these are the most common ones.
4217 */
4218DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4219{
4220 RT_NOREF(pReNative, off);
4221 return off;
4222}
4223
4224
4225#ifdef VBOX_STRICT
4226/**
4227 * Does internal register allocator sanity checks.
4228 */
4229static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
4230{
4231 /*
4232 * Iterate host registers building a guest shadowing set.
4233 */
4234 uint64_t bmGstRegShadows = 0;
4235 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
4236 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
4237 while (bmHstRegsWithGstShadow)
4238 {
4239 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
4240 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4241 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4242
4243 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4244 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
4245 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
4246 bmGstRegShadows |= fThisGstRegShadows;
4247 while (fThisGstRegShadows)
4248 {
4249 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
4250 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
4251 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
4252 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
4253 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
4254 }
4255 }
4256 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
4257 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
4258 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
4259
4260 /*
4261 * Now the other way around, checking the guest to host index array.
4262 */
4263 bmHstRegsWithGstShadow = 0;
4264 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
4265 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4266 while (bmGstRegShadows)
4267 {
4268 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
4269 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4270 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
4271
4272 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4273 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
4274 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
4275 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
4276 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4277 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4278 }
4279 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
4280 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
4281 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
4282}
4283#endif
4284
4285
4286/*********************************************************************************************************************************
4287* Code Emitters (larger snippets) *
4288*********************************************************************************************************************************/
4289
4290/**
4291 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
4292 * extending to 64-bit width.
4293 *
4294 * @returns New code buffer offset on success, UINT32_MAX on failure.
4295 * @param pReNative .
4296 * @param off The current code buffer position.
4297 * @param idxHstReg The host register to load the guest register value into.
4298 * @param enmGstReg The guest register to load.
4299 *
4300 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
4301 * that is something the caller needs to do if applicable.
4302 */
4303DECL_HIDDEN_THROW(uint32_t)
4304iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
4305{
4306 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
4307 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4308
4309 switch (g_aGstShadowInfo[enmGstReg].cb)
4310 {
4311 case sizeof(uint64_t):
4312 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4313 case sizeof(uint32_t):
4314 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4315 case sizeof(uint16_t):
4316 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4317#if 0 /* not present in the table. */
4318 case sizeof(uint8_t):
4319 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4320#endif
4321 default:
4322 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4323 }
4324}
4325
4326
4327#ifdef VBOX_STRICT
4328/**
4329 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
4330 *
4331 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4332 * Trashes EFLAGS on AMD64.
4333 */
4334static uint32_t
4335iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
4336{
4337# ifdef RT_ARCH_AMD64
4338 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4339
4340 /* rol reg64, 32 */
4341 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4342 pbCodeBuf[off++] = 0xc1;
4343 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4344 pbCodeBuf[off++] = 32;
4345
4346 /* test reg32, ffffffffh */
4347 if (idxReg >= 8)
4348 pbCodeBuf[off++] = X86_OP_REX_B;
4349 pbCodeBuf[off++] = 0xf7;
4350 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4351 pbCodeBuf[off++] = 0xff;
4352 pbCodeBuf[off++] = 0xff;
4353 pbCodeBuf[off++] = 0xff;
4354 pbCodeBuf[off++] = 0xff;
4355
4356 /* je/jz +1 */
4357 pbCodeBuf[off++] = 0x74;
4358 pbCodeBuf[off++] = 0x01;
4359
4360 /* int3 */
4361 pbCodeBuf[off++] = 0xcc;
4362
4363 /* rol reg64, 32 */
4364 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4365 pbCodeBuf[off++] = 0xc1;
4366 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4367 pbCodeBuf[off++] = 32;
4368
4369# elif defined(RT_ARCH_ARM64)
4370 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4371 /* lsr tmp0, reg64, #32 */
4372 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
4373 /* cbz tmp0, +1 */
4374 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4375 /* brk #0x1100 */
4376 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
4377
4378# else
4379# error "Port me!"
4380# endif
4381 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4382 return off;
4383}
4384#endif /* VBOX_STRICT */
4385
4386
4387#ifdef VBOX_STRICT
4388/**
4389 * Emitting code that checks that the content of register @a idxReg is the same
4390 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
4391 * instruction if that's not the case.
4392 *
4393 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4394 * Trashes EFLAGS on AMD64.
4395 */
4396static uint32_t
4397iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
4398{
4399# ifdef RT_ARCH_AMD64
4400 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4401
4402 /* cmp reg, [mem] */
4403 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
4404 {
4405 if (idxReg >= 8)
4406 pbCodeBuf[off++] = X86_OP_REX_R;
4407 pbCodeBuf[off++] = 0x38;
4408 }
4409 else
4410 {
4411 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
4412 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
4413 else
4414 {
4415 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
4416 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4417 else
4418 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
4419 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
4420 if (idxReg >= 8)
4421 pbCodeBuf[off++] = X86_OP_REX_R;
4422 }
4423 pbCodeBuf[off++] = 0x39;
4424 }
4425 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
4426
4427 /* je/jz +1 */
4428 pbCodeBuf[off++] = 0x74;
4429 pbCodeBuf[off++] = 0x01;
4430
4431 /* int3 */
4432 pbCodeBuf[off++] = 0xcc;
4433
4434 /* For values smaller than the register size, we must check that the rest
4435 of the register is all zeros. */
4436 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
4437 {
4438 /* test reg64, imm32 */
4439 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
4440 pbCodeBuf[off++] = 0xf7;
4441 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
4442 pbCodeBuf[off++] = 0;
4443 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
4444 pbCodeBuf[off++] = 0xff;
4445 pbCodeBuf[off++] = 0xff;
4446
4447 /* je/jz +1 */
4448 pbCodeBuf[off++] = 0x74;
4449 pbCodeBuf[off++] = 0x01;
4450
4451 /* int3 */
4452 pbCodeBuf[off++] = 0xcc;
4453 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4454 }
4455 else
4456 {
4457 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4458 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
4459 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
4460 }
4461
4462# elif defined(RT_ARCH_ARM64)
4463 /* mov TMP0, [gstreg] */
4464 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
4465
4466 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4467 /* sub tmp0, tmp0, idxReg */
4468 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
4469 /* cbz tmp0, +1 */
4470 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
4471 /* brk #0x1000+enmGstReg */
4472 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
4473 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4474
4475# else
4476# error "Port me!"
4477# endif
4478 return off;
4479}
4480#endif /* VBOX_STRICT */
4481
4482
4483#ifdef VBOX_STRICT
4484/**
4485 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
4486 * important bits.
4487 *
4488 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
4489 * Trashes EFLAGS on AMD64.
4490 */
4491static uint32_t
4492iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
4493{
4494 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4495 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
4496 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
4497 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
4498
4499#ifdef RT_ARCH_AMD64
4500 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4501
4502 /* je/jz +1 */
4503 pbCodeBuf[off++] = 0x74;
4504 pbCodeBuf[off++] = 0x01;
4505
4506 /* int3 */
4507 pbCodeBuf[off++] = 0xcc;
4508
4509# elif defined(RT_ARCH_ARM64)
4510 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4511
4512 /* b.eq +1 */
4513 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
4514 /* brk #0x2000 */
4515 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
4516
4517# else
4518# error "Port me!"
4519# endif
4520 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4521
4522 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4523 return off;
4524}
4525#endif /* VBOX_STRICT */
4526
4527
4528/**
4529 * Emits a code for checking the return code of a call and rcPassUp, returning
4530 * from the code if either are non-zero.
4531 */
4532DECL_HIDDEN_THROW(uint32_t)
4533iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
4534{
4535#ifdef RT_ARCH_AMD64
4536 /*
4537 * AMD64: eax = call status code.
4538 */
4539
4540 /* edx = rcPassUp */
4541 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4542# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4543 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
4544# endif
4545
4546 /* edx = eax | rcPassUp */
4547 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4548 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
4549 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
4550 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4551
4552 /* Jump to non-zero status return path. */
4553 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
4554
4555 /* done. */
4556
4557#elif RT_ARCH_ARM64
4558 /*
4559 * ARM64: w0 = call status code.
4560 */
4561# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4562 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
4563# endif
4564 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
4565
4566 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4567
4568 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
4569
4570 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4571 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
4572 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
4573
4574#else
4575# error "port me"
4576#endif
4577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4578 return off;
4579}
4580
4581
4582/**
4583 * Emits code to check if the content of @a idxAddrReg is a canonical address,
4584 * raising a \#GP(0) if it isn't.
4585 *
4586 * @returns New code buffer offset, UINT32_MAX on failure.
4587 * @param pReNative The native recompile state.
4588 * @param off The code buffer offset.
4589 * @param idxAddrReg The host register with the address to check.
4590 * @param idxInstr The current instruction.
4591 */
4592DECL_HIDDEN_THROW(uint32_t)
4593iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
4594{
4595 /*
4596 * Make sure we don't have any outstanding guest register writes as we may
4597 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4598 */
4599 off = iemNativeRegFlushPendingWrites(pReNative, off);
4600
4601#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4602 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4603#else
4604 RT_NOREF(idxInstr);
4605#endif
4606
4607#ifdef RT_ARCH_AMD64
4608 /*
4609 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
4610 * return raisexcpt();
4611 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
4612 */
4613 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4614
4615 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
4616 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
4617 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
4618 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
4619 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4620
4621 iemNativeRegFreeTmp(pReNative, iTmpReg);
4622
4623#elif defined(RT_ARCH_ARM64)
4624 /*
4625 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
4626 * return raisexcpt();
4627 * ----
4628 * mov x1, 0x800000000000
4629 * add x1, x0, x1
4630 * cmp xzr, x1, lsr 48
4631 * b.ne .Lraisexcpt
4632 */
4633 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4634
4635 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
4636 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
4637 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
4638 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4639
4640 iemNativeRegFreeTmp(pReNative, iTmpReg);
4641
4642#else
4643# error "Port me"
4644#endif
4645 return off;
4646}
4647
4648
4649/**
4650 * Emits code to check if the content of @a idxAddrReg is within the limit of
4651 * idxSegReg, raising a \#GP(0) if it isn't.
4652 *
4653 * @returns New code buffer offset; throws VBox status code on error.
4654 * @param pReNative The native recompile state.
4655 * @param off The code buffer offset.
4656 * @param idxAddrReg The host register (32-bit) with the address to
4657 * check.
4658 * @param idxSegReg The segment register (X86_SREG_XXX) to check
4659 * against.
4660 * @param idxInstr The current instruction.
4661 */
4662DECL_HIDDEN_THROW(uint32_t)
4663iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4664 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
4665{
4666 /*
4667 * Make sure we don't have any outstanding guest register writes as we may
4668 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
4669 */
4670 off = iemNativeRegFlushPendingWrites(pReNative, off);
4671
4672#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4673 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4674#else
4675 RT_NOREF(idxInstr);
4676#endif
4677
4678 /** @todo implement expand down/whatnot checking */
4679 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
4680
4681 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4682 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
4683 kIemNativeGstRegUse_ForUpdate);
4684
4685 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
4686 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
4687
4688 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
4689 return off;
4690}
4691
4692
4693/**
4694 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
4695 *
4696 * @returns The flush mask.
4697 * @param fCImpl The IEM_CIMPL_F_XXX flags.
4698 * @param fGstShwFlush The starting flush mask.
4699 */
4700DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
4701{
4702 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
4703 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
4704 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
4705 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
4706 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
4707 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
4708 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
4709 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
4710 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
4711 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
4712 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
4713 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
4714 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4715 return fGstShwFlush;
4716}
4717
4718
4719/**
4720 * Emits a call to a CImpl function or something similar.
4721 */
4722DECL_HIDDEN_THROW(uint32_t)
4723iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
4724 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
4725{
4726 /*
4727 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
4728 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
4729 */
4730 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
4731 fGstShwFlush
4732 | RT_BIT_64(kIemNativeGstReg_Pc)
4733 | RT_BIT_64(kIemNativeGstReg_EFlags));
4734 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4735
4736 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4737
4738 /*
4739 * Load the parameters.
4740 */
4741#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
4742 /* Special code the hidden VBOXSTRICTRC pointer. */
4743 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4744 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4745 if (cAddParams > 0)
4746 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
4747 if (cAddParams > 1)
4748 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
4749 if (cAddParams > 2)
4750 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
4751 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4752
4753#else
4754 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4755 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4756 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
4757 if (cAddParams > 0)
4758 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
4759 if (cAddParams > 1)
4760 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
4761 if (cAddParams > 2)
4762# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
4763 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
4764# else
4765 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
4766# endif
4767#endif
4768
4769 /*
4770 * Make the call.
4771 */
4772 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
4773
4774#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4775 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4776#endif
4777
4778 /*
4779 * Check the status code.
4780 */
4781 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4782}
4783
4784
4785/**
4786 * Emits a call to a threaded worker function.
4787 */
4788static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
4789{
4790 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
4791 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
4792
4793#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4794 /* The threaded function may throw / long jmp, so set current instruction
4795 number if we're counting. */
4796 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4797#endif
4798
4799 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
4800
4801#ifdef RT_ARCH_AMD64
4802 /* Load the parameters and emit the call. */
4803# ifdef RT_OS_WINDOWS
4804# ifndef VBOXSTRICTRC_STRICT_ENABLED
4805 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4806 if (cParams > 0)
4807 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
4808 if (cParams > 1)
4809 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
4810 if (cParams > 2)
4811 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
4812# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
4813 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
4814 if (cParams > 0)
4815 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4816 if (cParams > 1)
4817 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4818 if (cParams > 2)
4819 {
4820 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4821 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4822 }
4823 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4824# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4825# else
4826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4827 if (cParams > 0)
4828 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4829 if (cParams > 1)
4830 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4831 if (cParams > 2)
4832 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4833# endif
4834
4835 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4836
4837# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4838 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4839# endif
4840
4841#elif RT_ARCH_ARM64
4842 /*
4843 * ARM64:
4844 */
4845 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4846 if (cParams > 0)
4847 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4848 if (cParams > 1)
4849 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4850 if (cParams > 2)
4851 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4852
4853 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4854
4855#else
4856# error "port me"
4857#endif
4858
4859 /*
4860 * Check the status code.
4861 */
4862 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4863
4864 return off;
4865}
4866
4867
4868/**
4869 * Emits the code at the CheckBranchMiss label.
4870 */
4871static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4872{
4873 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
4874 if (idxLabel != UINT32_MAX)
4875 {
4876 iemNativeLabelDefine(pReNative, idxLabel, off);
4877
4878 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
4879 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4880 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
4881
4882 /* jump back to the return sequence. */
4883 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4884 }
4885 return off;
4886}
4887
4888
4889/**
4890 * Emits the code at the NeedCsLimChecking label.
4891 */
4892static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4893{
4894 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
4895 if (idxLabel != UINT32_MAX)
4896 {
4897 iemNativeLabelDefine(pReNative, idxLabel, off);
4898
4899 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
4900 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4901 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
4902
4903 /* jump back to the return sequence. */
4904 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4905 }
4906 return off;
4907}
4908
4909
4910/**
4911 * Emits the code at the ObsoleteTb label.
4912 */
4913static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4914{
4915 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
4916 if (idxLabel != UINT32_MAX)
4917 {
4918 iemNativeLabelDefine(pReNative, idxLabel, off);
4919
4920 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
4921 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4922 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
4923
4924 /* jump back to the return sequence. */
4925 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4926 }
4927 return off;
4928}
4929
4930
4931/**
4932 * Emits the code at the RaiseGP0 label.
4933 */
4934static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4935{
4936 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
4937 if (idxLabel != UINT32_MAX)
4938 {
4939 iemNativeLabelDefine(pReNative, idxLabel, off);
4940
4941 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
4942 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4943 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
4944
4945 /* jump back to the return sequence. */
4946 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4947 }
4948 return off;
4949}
4950
4951
4952/**
4953 * Emits the code at the ReturnWithFlags label (returns
4954 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
4955 */
4956static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4957{
4958 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
4959 if (idxLabel != UINT32_MAX)
4960 {
4961 iemNativeLabelDefine(pReNative, idxLabel, off);
4962
4963 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
4964
4965 /* jump back to the return sequence. */
4966 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4967 }
4968 return off;
4969}
4970
4971
4972/**
4973 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4974 */
4975static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4976{
4977 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4978 if (idxLabel != UINT32_MAX)
4979 {
4980 iemNativeLabelDefine(pReNative, idxLabel, off);
4981
4982 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
4983
4984 /* jump back to the return sequence. */
4985 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4986 }
4987 return off;
4988}
4989
4990
4991/**
4992 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
4993 */
4994static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4995{
4996 /*
4997 * Generate the rc + rcPassUp fiddling code if needed.
4998 */
4999 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5000 if (idxLabel != UINT32_MAX)
5001 {
5002 iemNativeLabelDefine(pReNative, idxLabel, off);
5003
5004 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5005#ifdef RT_ARCH_AMD64
5006# ifdef RT_OS_WINDOWS
5007# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5008 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5009# endif
5010 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5011 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5012# else
5013 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5014 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5015# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5016 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5017# endif
5018# endif
5019# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5020 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5021# endif
5022
5023#else
5024 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5025 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5026 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5027#endif
5028
5029 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5030 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5031 }
5032 return off;
5033}
5034
5035
5036/**
5037 * Emits a standard epilog.
5038 */
5039static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5040{
5041 *pidxReturnLabel = UINT32_MAX;
5042
5043 /*
5044 * Successful return, so clear the return register (eax, w0).
5045 */
5046 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5047
5048 /*
5049 * Define label for common return point.
5050 */
5051 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5052 *pidxReturnLabel = idxReturn;
5053
5054 /*
5055 * Restore registers and return.
5056 */
5057#ifdef RT_ARCH_AMD64
5058 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5059
5060 /* Reposition esp at the r15 restore point. */
5061 pbCodeBuf[off++] = X86_OP_REX_W;
5062 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5063 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5064 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5065
5066 /* Pop non-volatile registers and return */
5067 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5068 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5069 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5070 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5071 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5072 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5073 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5074 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5075# ifdef RT_OS_WINDOWS
5076 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5077 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5078# endif
5079 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5080 pbCodeBuf[off++] = 0xc9; /* leave */
5081 pbCodeBuf[off++] = 0xc3; /* ret */
5082 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5083
5084#elif RT_ARCH_ARM64
5085 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5086
5087 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5088 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5089 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5090 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5091 IEMNATIVE_FRAME_VAR_SIZE / 8);
5092 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5093 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5094 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5095 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5096 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5097 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5098 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5099 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5100 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5101 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5102 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5103 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5104
5105 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5106 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5107 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5108 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5109
5110 /* retab / ret */
5111# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5112 if (1)
5113 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5114 else
5115# endif
5116 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5117
5118#else
5119# error "port me"
5120#endif
5121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5122
5123 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5124}
5125
5126
5127/**
5128 * Emits a standard prolog.
5129 */
5130static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5131{
5132#ifdef RT_ARCH_AMD64
5133 /*
5134 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5135 * reserving 64 bytes for stack variables plus 4 non-register argument
5136 * slots. Fixed register assignment: xBX = pReNative;
5137 *
5138 * Since we always do the same register spilling, we can use the same
5139 * unwind description for all the code.
5140 */
5141 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5142 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5143 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5144 pbCodeBuf[off++] = 0x8b;
5145 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5146 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5147 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5148# ifdef RT_OS_WINDOWS
5149 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5150 pbCodeBuf[off++] = 0x8b;
5151 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5152 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5153 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5154# else
5155 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5156 pbCodeBuf[off++] = 0x8b;
5157 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5158# endif
5159 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5160 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5161 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5162 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5163 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5164 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5165 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5166 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5167
5168 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5169 X86_GREG_xSP,
5170 IEMNATIVE_FRAME_ALIGN_SIZE
5171 + IEMNATIVE_FRAME_VAR_SIZE
5172 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5173 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5174 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5175 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5176 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5177
5178#elif RT_ARCH_ARM64
5179 /*
5180 * We set up a stack frame exactly like on x86, only we have to push the
5181 * return address our selves here. We save all non-volatile registers.
5182 */
5183 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5184
5185# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5186 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5187 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5188 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5189 /* pacibsp */
5190 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5191# endif
5192
5193 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5194 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5195 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5196 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5197 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5198 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5199 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5200 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5201 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5202 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5203 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5204 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5205 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5206 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5207 /* Save the BP and LR (ret address) registers at the top of the frame. */
5208 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5209 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5210 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5211 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5212 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5213 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5214
5215 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5216 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5217
5218 /* mov r28, r0 */
5219 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
5220 /* mov r27, r1 */
5221 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
5222
5223#else
5224# error "port me"
5225#endif
5226 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5227 return off;
5228}
5229
5230
5231
5232
5233/*********************************************************************************************************************************
5234* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
5235*********************************************************************************************************************************/
5236
5237#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
5238 { \
5239 Assert(pReNative->Core.bmVars == 0); \
5240 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
5241 Assert(pReNative->Core.bmStack == 0); \
5242 pReNative->fMc = (a_fMcFlags); \
5243 pReNative->fCImpl = (a_fCImplFlags); \
5244 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
5245
5246/** We have to get to the end in recompilation mode, as otherwise we won't
5247 * generate code for all the IEM_MC_IF_XXX branches. */
5248#define IEM_MC_END() \
5249 iemNativeVarFreeAll(pReNative); \
5250 } return off
5251
5252
5253
5254/*********************************************************************************************************************************
5255* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
5256*********************************************************************************************************************************/
5257
5258#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
5259 pReNative->fMc = 0; \
5260 pReNative->fCImpl = (a_fFlags); \
5261 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
5262
5263
5264#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5265 pReNative->fMc = 0; \
5266 pReNative->fCImpl = (a_fFlags); \
5267 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
5268
5269DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5270 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5271 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
5272{
5273 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
5274}
5275
5276
5277#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5278 pReNative->fMc = 0; \
5279 pReNative->fCImpl = (a_fFlags); \
5280 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5281 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
5282
5283DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5284 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5285 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
5286{
5287 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
5288}
5289
5290
5291#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5292 pReNative->fMc = 0; \
5293 pReNative->fCImpl = (a_fFlags); \
5294 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
5295 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
5296
5297DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5298 uint8_t idxInstr, uint64_t a_fGstShwFlush,
5299 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
5300 uint64_t uArg2)
5301{
5302 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
5303}
5304
5305
5306
5307/*********************************************************************************************************************************
5308* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
5309*********************************************************************************************************************************/
5310
5311/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
5312 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
5313DECL_INLINE_THROW(uint32_t)
5314iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5315{
5316 /*
5317 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
5318 * return with special status code and make the execution loop deal with
5319 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
5320 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
5321 * could continue w/o interruption, it probably will drop into the
5322 * debugger, so not worth the effort of trying to services it here and we
5323 * just lump it in with the handling of the others.
5324 *
5325 * To simplify the code and the register state management even more (wrt
5326 * immediate in AND operation), we always update the flags and skip the
5327 * extra check associated conditional jump.
5328 */
5329 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
5330 <= UINT32_MAX);
5331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5332 kIemNativeGstRegUse_ForUpdate);
5333 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
5334 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
5335 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
5336 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
5337 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5338
5339 /* Free but don't flush the EFLAGS register. */
5340 iemNativeRegFreeTmp(pReNative, idxEflReg);
5341
5342 return off;
5343}
5344
5345
5346#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
5347 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5348
5349#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
5350 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
5351 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5352
5353/** Same as iemRegAddToRip64AndFinishingNoFlags. */
5354DECL_INLINE_THROW(uint32_t)
5355iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5356{
5357 /* Allocate a temporary PC register. */
5358 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5359
5360 /* Perform the addition and store the result. */
5361 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
5362 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5363
5364 /* Free but don't flush the PC register. */
5365 iemNativeRegFreeTmp(pReNative, idxPcReg);
5366
5367 return off;
5368}
5369
5370
5371#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
5372 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5373
5374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
5375 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
5376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5377
5378/** Same as iemRegAddToEip32AndFinishingNoFlags. */
5379DECL_INLINE_THROW(uint32_t)
5380iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5381{
5382 /* Allocate a temporary PC register. */
5383 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5384
5385 /* Perform the addition and store the result. */
5386 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5387 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5388
5389 /* Free but don't flush the PC register. */
5390 iemNativeRegFreeTmp(pReNative, idxPcReg);
5391
5392 return off;
5393}
5394
5395
5396#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
5397 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
5398
5399#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
5400 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
5401 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5402
5403/** Same as iemRegAddToIp16AndFinishingNoFlags. */
5404DECL_INLINE_THROW(uint32_t)
5405iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
5406{
5407 /* Allocate a temporary PC register. */
5408 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5409
5410 /* Perform the addition and store the result. */
5411 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
5412 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5413 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5414
5415 /* Free but don't flush the PC register. */
5416 iemNativeRegFreeTmp(pReNative, idxPcReg);
5417
5418 return off;
5419}
5420
5421
5422
5423/*********************************************************************************************************************************
5424* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
5425*********************************************************************************************************************************/
5426
5427#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
5428 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5429 (a_enmEffOpSize), pCallEntry->idxInstr)
5430
5431#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5432 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
5433 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5434
5435#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
5436 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5437 IEMMODE_16BIT, pCallEntry->idxInstr)
5438
5439#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
5440 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
5441 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5442
5443#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
5444 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5445 IEMMODE_64BIT, pCallEntry->idxInstr)
5446
5447#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
5448 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
5449 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5450
5451/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
5452 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
5453 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
5454DECL_INLINE_THROW(uint32_t)
5455iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5456 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5457{
5458 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
5459
5460 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5461 off = iemNativeRegFlushPendingWrites(pReNative, off);
5462
5463 /* Allocate a temporary PC register. */
5464 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5465
5466 /* Perform the addition. */
5467 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
5468
5469 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
5470 {
5471 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5472 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5473 }
5474 else
5475 {
5476 /* Just truncate the result to 16-bit IP. */
5477 Assert(enmEffOpSize == IEMMODE_16BIT);
5478 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5479 }
5480 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5481
5482 /* Free but don't flush the PC register. */
5483 iemNativeRegFreeTmp(pReNative, idxPcReg);
5484
5485 return off;
5486}
5487
5488
5489#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
5490 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
5491 (a_enmEffOpSize), pCallEntry->idxInstr)
5492
5493#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
5494 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
5495 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5496
5497#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
5498 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
5499 IEMMODE_16BIT, pCallEntry->idxInstr)
5500
5501#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
5502 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
5503 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5504
5505#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
5506 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
5507 IEMMODE_32BIT, pCallEntry->idxInstr)
5508
5509#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
5510 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
5511 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5512
5513/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
5514 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
5515 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
5516DECL_INLINE_THROW(uint32_t)
5517iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
5518 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
5519{
5520 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
5521
5522 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5523 off = iemNativeRegFlushPendingWrites(pReNative, off);
5524
5525 /* Allocate a temporary PC register. */
5526 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5527
5528 /* Perform the addition. */
5529 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5530
5531 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
5532 if (enmEffOpSize == IEMMODE_16BIT)
5533 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5534
5535 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
5536 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5537
5538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5539
5540 /* Free but don't flush the PC register. */
5541 iemNativeRegFreeTmp(pReNative, idxPcReg);
5542
5543 return off;
5544}
5545
5546
5547#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
5548 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
5549
5550#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
5551 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
5552 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5553
5554#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
5555 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
5556
5557#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
5558 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
5559 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5560
5561#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
5562 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
5563
5564#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
5565 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
5566 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5567
5568/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
5569DECL_INLINE_THROW(uint32_t)
5570iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5571 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
5572{
5573 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5574 off = iemNativeRegFlushPendingWrites(pReNative, off);
5575
5576 /* Allocate a temporary PC register. */
5577 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5578
5579 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
5580 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
5581 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
5582 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5583 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5584
5585 /* Free but don't flush the PC register. */
5586 iemNativeRegFreeTmp(pReNative, idxPcReg);
5587
5588 return off;
5589}
5590
5591
5592
5593/*********************************************************************************************************************************
5594* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
5595*********************************************************************************************************************************/
5596
5597/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
5598#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
5599 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5600
5601/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
5602#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
5603 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5604
5605/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
5606#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
5607 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
5608
5609/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
5610 * clears flags. */
5611#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
5612 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
5613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5614
5615/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
5616 * clears flags. */
5617#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
5618 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
5619 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5620
5621/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
5622 * clears flags. */
5623#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
5624 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
5625 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5626
5627#undef IEM_MC_SET_RIP_U16_AND_FINISH
5628
5629
5630/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
5631#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
5632 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5633
5634/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
5635#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
5636 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
5637
5638/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
5639 * clears flags. */
5640#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
5641 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
5642 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5643
5644/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
5645 * and clears flags. */
5646#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
5647 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
5648 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5649
5650#undef IEM_MC_SET_RIP_U32_AND_FINISH
5651
5652
5653/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
5654#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
5655 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
5656
5657/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
5658 * and clears flags. */
5659#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
5660 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
5661 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
5662
5663#undef IEM_MC_SET_RIP_U64_AND_FINISH
5664
5665
5666/** Same as iemRegRipJumpU16AndFinishNoFlags,
5667 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
5668DECL_INLINE_THROW(uint32_t)
5669iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
5670 uint8_t idxInstr, uint8_t cbVar)
5671{
5672 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
5673 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
5674
5675 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
5676 off = iemNativeRegFlushPendingWrites(pReNative, off);
5677
5678 /* Get a register with the new PC loaded from idxVarPc.
5679 Note! This ASSUMES that the high bits of the GPR is zeroed. */
5680 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
5681
5682 /* Check limit (may #GP(0) + exit TB). */
5683 if (!f64Bit)
5684 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
5685 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
5686 else if (cbVar > sizeof(uint32_t))
5687 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
5688
5689 /* Store the result. */
5690 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5691
5692 /** @todo implictly free the variable? */
5693
5694 return off;
5695}
5696
5697
5698
5699/*********************************************************************************************************************************
5700* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
5701*********************************************************************************************************************************/
5702
5703/**
5704 * Pushes an IEM_MC_IF_XXX onto the condition stack.
5705 *
5706 * @returns Pointer to the condition stack entry on success, NULL on failure
5707 * (too many nestings)
5708 */
5709DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
5710{
5711 uint32_t const idxStack = pReNative->cCondDepth;
5712 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
5713
5714 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
5715 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
5716
5717 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
5718 pEntry->fInElse = false;
5719 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
5720 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
5721
5722 return pEntry;
5723}
5724
5725
5726/**
5727 * Start of the if-block, snapshotting the register and variable state.
5728 */
5729DECL_INLINE_THROW(void)
5730iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
5731{
5732 Assert(offIfBlock != UINT32_MAX);
5733 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5734 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5735 Assert(!pEntry->fInElse);
5736
5737 /* Define the start of the IF block if request or for disassembly purposes. */
5738 if (idxLabelIf != UINT32_MAX)
5739 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
5740#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5741 else
5742 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
5743#else
5744 RT_NOREF(offIfBlock);
5745#endif
5746
5747 /* Copy the initial state so we can restore it in the 'else' block. */
5748 pEntry->InitialState = pReNative->Core;
5749}
5750
5751
5752#define IEM_MC_ELSE() } while (0); \
5753 off = iemNativeEmitElse(pReNative, off); \
5754 do {
5755
5756/** Emits code related to IEM_MC_ELSE. */
5757DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5758{
5759 /* Check sanity and get the conditional stack entry. */
5760 Assert(off != UINT32_MAX);
5761 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5762 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5763 Assert(!pEntry->fInElse);
5764
5765 /* Jump to the endif */
5766 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
5767
5768 /* Define the else label and enter the else part of the condition. */
5769 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5770 pEntry->fInElse = true;
5771
5772 /* Snapshot the core state so we can do a merge at the endif and restore
5773 the snapshot we took at the start of the if-block. */
5774 pEntry->IfFinalState = pReNative->Core;
5775 pReNative->Core = pEntry->InitialState;
5776
5777 return off;
5778}
5779
5780
5781#define IEM_MC_ENDIF() } while (0); \
5782 off = iemNativeEmitEndIf(pReNative, off)
5783
5784/** Emits code related to IEM_MC_ENDIF. */
5785DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5786{
5787 /* Check sanity and get the conditional stack entry. */
5788 Assert(off != UINT32_MAX);
5789 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
5790 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
5791
5792 /*
5793 * Now we have find common group with the core state at the end of the
5794 * if-final. Use the smallest common denominator and just drop anything
5795 * that isn't the same in both states.
5796 */
5797 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
5798 * which is why we're doing this at the end of the else-block.
5799 * But we'd need more info about future for that to be worth the effort. */
5800 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
5801 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
5802 {
5803 /* shadow guest stuff first. */
5804 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
5805 if (fGstRegs)
5806 {
5807 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
5808 do
5809 {
5810 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5811 fGstRegs &= ~RT_BIT_64(idxGstReg);
5812
5813 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5814 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
5815 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
5816 {
5817 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
5818 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
5819 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
5820 }
5821 } while (fGstRegs);
5822 }
5823 else
5824 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
5825
5826 /* Check variables next. For now we must require them to be identical
5827 or stuff we can recreate. */
5828 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
5829 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
5830 if (fVars)
5831 {
5832 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
5833 do
5834 {
5835 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
5836 fVars &= ~RT_BIT_32(idxVar);
5837
5838 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
5839 {
5840 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
5841 continue;
5842 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5843 {
5844 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5845 if (idxHstReg != UINT8_MAX)
5846 {
5847 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5848 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5849 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
5850 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5851 }
5852 continue;
5853 }
5854 }
5855 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
5856 continue;
5857
5858 /* Irreconcilable, so drop it. */
5859 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5860 if (idxHstReg != UINT8_MAX)
5861 {
5862 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5863 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5864 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
5865 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
5866 }
5867 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
5868 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5869 } while (fVars);
5870 }
5871
5872 /* Finally, check that the host register allocations matches. */
5873 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
5874 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
5875 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
5876 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
5877 }
5878
5879 /*
5880 * Define the endif label and maybe the else one if we're still in the 'if' part.
5881 */
5882 if (!pEntry->fInElse)
5883 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
5884 else
5885 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
5886 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
5887
5888 /* Pop the conditional stack.*/
5889 pReNative->cCondDepth -= 1;
5890
5891 return off;
5892}
5893
5894
5895#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
5896 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
5897 do {
5898
5899/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
5900DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5901{
5902 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5903
5904 /* Get the eflags. */
5905 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5906 kIemNativeGstRegUse_ReadOnly);
5907
5908 /* Test and jump. */
5909 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5910
5911 /* Free but don't flush the EFlags register. */
5912 iemNativeRegFreeTmp(pReNative, idxEflReg);
5913
5914 /* Make a copy of the core state now as we start the if-block. */
5915 iemNativeCondStartIfBlock(pReNative, off);
5916
5917 return off;
5918}
5919
5920
5921#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
5922 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
5923 do {
5924
5925/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
5926DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
5927{
5928 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5929
5930 /* Get the eflags. */
5931 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5932 kIemNativeGstRegUse_ReadOnly);
5933
5934 /* Test and jump. */
5935 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
5936
5937 /* Free but don't flush the EFlags register. */
5938 iemNativeRegFreeTmp(pReNative, idxEflReg);
5939
5940 /* Make a copy of the core state now as we start the if-block. */
5941 iemNativeCondStartIfBlock(pReNative, off);
5942
5943 return off;
5944}
5945
5946
5947#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
5948 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
5949 do {
5950
5951/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
5952DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5953{
5954 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5955
5956 /* Get the eflags. */
5957 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5958 kIemNativeGstRegUse_ReadOnly);
5959
5960 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5961 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5962
5963 /* Test and jump. */
5964 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5965
5966 /* Free but don't flush the EFlags register. */
5967 iemNativeRegFreeTmp(pReNative, idxEflReg);
5968
5969 /* Make a copy of the core state now as we start the if-block. */
5970 iemNativeCondStartIfBlock(pReNative, off);
5971
5972 return off;
5973}
5974
5975
5976#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
5977 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
5978 do {
5979
5980/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
5981DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5982{
5983 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5984
5985 /* Get the eflags. */
5986 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5987 kIemNativeGstRegUse_ReadOnly);
5988
5989 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5990 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5991
5992 /* Test and jump. */
5993 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5994
5995 /* Free but don't flush the EFlags register. */
5996 iemNativeRegFreeTmp(pReNative, idxEflReg);
5997
5998 /* Make a copy of the core state now as we start the if-block. */
5999 iemNativeCondStartIfBlock(pReNative, off);
6000
6001 return off;
6002}
6003
6004
6005#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6006 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6007 do {
6008
6009#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6010 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6011 do {
6012
6013/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6014DECL_INLINE_THROW(uint32_t)
6015iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6016 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6017{
6018 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6019
6020 /* Get the eflags. */
6021 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6022 kIemNativeGstRegUse_ReadOnly);
6023
6024 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6025 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6026
6027 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6028 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6029 Assert(iBitNo1 != iBitNo2);
6030
6031#ifdef RT_ARCH_AMD64
6032 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6033
6034 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6035 if (iBitNo1 > iBitNo2)
6036 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6037 else
6038 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6039 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6040
6041#elif defined(RT_ARCH_ARM64)
6042 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6043 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6044
6045 /* and tmpreg, eflreg, #1<<iBitNo1 */
6046 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6047
6048 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6049 if (iBitNo1 > iBitNo2)
6050 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6051 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6052 else
6053 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6054 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6055
6056 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6057
6058#else
6059# error "Port me"
6060#endif
6061
6062 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6063 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6064 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6065
6066 /* Free but don't flush the EFlags and tmp registers. */
6067 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6068 iemNativeRegFreeTmp(pReNative, idxEflReg);
6069
6070 /* Make a copy of the core state now as we start the if-block. */
6071 iemNativeCondStartIfBlock(pReNative, off);
6072
6073 return off;
6074}
6075
6076
6077#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6078 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6079 do {
6080
6081#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6082 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6083 do {
6084
6085/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6086 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6087DECL_INLINE_THROW(uint32_t)
6088iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6089 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6090{
6091 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6092
6093 /* We need an if-block label for the non-inverted variant. */
6094 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6095 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6096
6097 /* Get the eflags. */
6098 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6099 kIemNativeGstRegUse_ReadOnly);
6100
6101 /* Translate the flag masks to bit numbers. */
6102 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6103 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6104
6105 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6106 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6107 Assert(iBitNo1 != iBitNo);
6108
6109 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6110 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6111 Assert(iBitNo2 != iBitNo);
6112 Assert(iBitNo2 != iBitNo1);
6113
6114#ifdef RT_ARCH_AMD64
6115 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6116#elif defined(RT_ARCH_ARM64)
6117 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6118#endif
6119
6120 /* Check for the lone bit first. */
6121 if (!fInverted)
6122 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6123 else
6124 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6125
6126 /* Then extract and compare the other two bits. */
6127#ifdef RT_ARCH_AMD64
6128 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6129 if (iBitNo1 > iBitNo2)
6130 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6131 else
6132 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6133 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6134
6135#elif defined(RT_ARCH_ARM64)
6136 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6137
6138 /* and tmpreg, eflreg, #1<<iBitNo1 */
6139 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6140
6141 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6142 if (iBitNo1 > iBitNo2)
6143 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6144 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6145 else
6146 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6147 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6148
6149 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6150
6151#else
6152# error "Port me"
6153#endif
6154
6155 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6156 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6157 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6158
6159 /* Free but don't flush the EFlags and tmp registers. */
6160 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6161 iemNativeRegFreeTmp(pReNative, idxEflReg);
6162
6163 /* Make a copy of the core state now as we start the if-block. */
6164 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
6165
6166 return off;
6167}
6168
6169
6170#define IEM_MC_IF_CX_IS_NZ() \
6171 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
6172 do {
6173
6174/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6175DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6176{
6177 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6178
6179 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6180 kIemNativeGstRegUse_ReadOnly);
6181 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6182 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6183
6184 iemNativeCondStartIfBlock(pReNative, off);
6185 return off;
6186}
6187
6188
6189#define IEM_MC_IF_ECX_IS_NZ() \
6190 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
6191 do {
6192
6193#define IEM_MC_IF_RCX_IS_NZ() \
6194 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
6195 do {
6196
6197/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
6198DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
6199{
6200 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6201
6202 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6203 kIemNativeGstRegUse_ReadOnly);
6204 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6205 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6206
6207 iemNativeCondStartIfBlock(pReNative, off);
6208 return off;
6209}
6210
6211
6212#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6213 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
6214 do {
6215
6216#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6217 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
6218 do {
6219
6220/** Emits code for IEM_MC_IF_CX_IS_NZ. */
6221DECL_INLINE_THROW(uint32_t)
6222iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
6223{
6224 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6225
6226 /* We have to load both RCX and EFLAGS before we can start branching,
6227 otherwise we'll end up in the else-block with an inconsistent
6228 register allocator state.
6229 Doing EFLAGS first as it's more likely to be loaded, right? */
6230 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6231 kIemNativeGstRegUse_ReadOnly);
6232 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6233 kIemNativeGstRegUse_ReadOnly);
6234
6235 /** @todo we could reduce this to a single branch instruction by spending a
6236 * temporary register and some setnz stuff. Not sure if loops are
6237 * worth it. */
6238 /* Check CX. */
6239 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
6240
6241 /* Check the EFlags bit. */
6242 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6243 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6244 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6245 !fCheckIfSet /*fJmpIfSet*/);
6246
6247 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6248 iemNativeRegFreeTmp(pReNative, idxEflReg);
6249
6250 iemNativeCondStartIfBlock(pReNative, off);
6251 return off;
6252}
6253
6254
6255#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6256 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
6257 do {
6258
6259#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6260 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
6261 do {
6262
6263#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
6264 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
6265 do {
6266
6267#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
6268 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
6269 do {
6270
6271/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
6272 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
6273 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
6274 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
6275DECL_INLINE_THROW(uint32_t)
6276iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6277 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
6278{
6279 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6280
6281 /* We have to load both RCX and EFLAGS before we can start branching,
6282 otherwise we'll end up in the else-block with an inconsistent
6283 register allocator state.
6284 Doing EFLAGS first as it's more likely to be loaded, right? */
6285 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6286 kIemNativeGstRegUse_ReadOnly);
6287 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
6288 kIemNativeGstRegUse_ReadOnly);
6289
6290 /** @todo we could reduce this to a single branch instruction by spending a
6291 * temporary register and some setnz stuff. Not sure if loops are
6292 * worth it. */
6293 /* Check RCX/ECX. */
6294 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
6295
6296 /* Check the EFlags bit. */
6297 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6298 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6299 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
6300 !fCheckIfSet /*fJmpIfSet*/);
6301
6302 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
6303 iemNativeRegFreeTmp(pReNative, idxEflReg);
6304
6305 iemNativeCondStartIfBlock(pReNative, off);
6306 return off;
6307}
6308
6309
6310
6311/*********************************************************************************************************************************
6312* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6313*********************************************************************************************************************************/
6314/** Number of hidden arguments for CIMPL calls.
6315 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
6316#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6317# define IEM_CIMPL_HIDDEN_ARGS 3
6318#else
6319# define IEM_CIMPL_HIDDEN_ARGS 2
6320#endif
6321
6322#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
6323 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
6324
6325#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
6326 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
6327
6328#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
6329 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
6330
6331#define IEM_MC_LOCAL(a_Type, a_Name) \
6332 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
6333
6334#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
6335 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
6336
6337
6338/**
6339 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
6340 */
6341DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
6342{
6343 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
6344 return IEM_CIMPL_HIDDEN_ARGS;
6345 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
6346 return 1;
6347 return 0;
6348}
6349
6350
6351/**
6352 * Internal work that allocates a variable with kind set to
6353 * kIemNativeVarKind_Invalid and no current stack allocation.
6354 *
6355 * The kind will either be set by the caller or later when the variable is first
6356 * assigned a value.
6357 */
6358static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6359{
6360 Assert(cbType > 0 && cbType <= 64);
6361 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6362 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6363 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6364 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6365 pReNative->Core.aVars[idxVar].cbVar = cbType;
6366 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6367 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6368 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6369 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6370 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6371 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6372 pReNative->Core.aVars[idxVar].u.uValue = 0;
6373 return idxVar;
6374}
6375
6376
6377/**
6378 * Internal work that allocates an argument variable w/o setting enmKind.
6379 */
6380static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6381{
6382 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6383 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6384 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6385
6386 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6387 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
6388 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6389 return idxVar;
6390}
6391
6392
6393/**
6394 * Gets the stack slot for a stack variable, allocating one if necessary.
6395 *
6396 * Calling this function implies that the stack slot will contain a valid
6397 * variable value. The caller deals with any register currently assigned to the
6398 * variable, typically by spilling it into the stack slot.
6399 *
6400 * @returns The stack slot number.
6401 * @param pReNative The recompiler state.
6402 * @param idxVar The variable.
6403 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6404 */
6405DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6406{
6407 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6408 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6409
6410 /* Already got a slot? */
6411 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6412 if (idxStackSlot != UINT8_MAX)
6413 {
6414 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6415 return idxStackSlot;
6416 }
6417
6418 /*
6419 * A single slot is easy to allocate.
6420 * Allocate them from the top end, closest to BP, to reduce the displacement.
6421 */
6422 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
6423 {
6424 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6425 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6426 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6427 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6428 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
6429 return (uint8_t)iSlot;
6430 }
6431
6432 /*
6433 * We need more than one stack slot.
6434 *
6435 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6436 */
6437 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6438 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
6439 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
6440 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
6441 uint32_t bmStack = ~pReNative->Core.bmStack;
6442 while (bmStack != UINT32_MAX)
6443 {
6444/** @todo allocate from the top to reduce BP displacement. */
6445 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6446 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6447 if (!(iSlot & fBitAlignMask))
6448 {
6449 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6450 {
6451 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6452 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
6453 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
6454 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
6455 return (uint8_t)iSlot;
6456 }
6457 }
6458 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6459 }
6460 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6461}
6462
6463
6464/**
6465 * Changes the variable to a stack variable.
6466 *
6467 * Currently this is s only possible to do the first time the variable is used,
6468 * switching later is can be implemented but not done.
6469 *
6470 * @param pReNative The recompiler state.
6471 * @param idxVar The variable.
6472 * @throws VERR_IEM_VAR_IPE_2
6473 */
6474static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6475{
6476 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6477 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6478 {
6479 /* We could in theory transition from immediate to stack as well, but it
6480 would involve the caller doing work storing the value on the stack. So,
6481 till that's required we only allow transition from invalid. */
6482 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6483 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6484 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6485 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
6486
6487 /* Note! We don't allocate a stack slot here, that's only done when a
6488 slot is actually needed to hold a variable value. */
6489 }
6490}
6491
6492
6493/**
6494 * Sets it to a variable with a constant value.
6495 *
6496 * This does not require stack storage as we know the value and can always
6497 * reload it, unless of course it's referenced.
6498 *
6499 * @param pReNative The recompiler state.
6500 * @param idxVar The variable.
6501 * @param uValue The immediate value.
6502 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6503 */
6504static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6505{
6506 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6507 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
6508 {
6509 /* Only simple transitions for now. */
6510 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6511 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6512 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
6513 }
6514 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6515
6516 pReNative->Core.aVars[idxVar].u.uValue = uValue;
6517 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
6518 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
6519 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
6520}
6521
6522
6523/**
6524 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6525 *
6526 * This does not require stack storage as we know the value and can always
6527 * reload it. Loading is postponed till needed.
6528 *
6529 * @param pReNative The recompiler state.
6530 * @param idxVar The variable.
6531 * @param idxOtherVar The variable to take the (stack) address of.
6532 *
6533 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6534 */
6535static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6536{
6537 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6538 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6539
6540 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6541 {
6542 /* Only simple transitions for now. */
6543 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6544 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6545 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6546 }
6547 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6548
6549 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
6550
6551 /* Update the other variable, ensure it's a stack variable. */
6552 /** @todo handle variables with const values... that'll go boom now. */
6553 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6554 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
6555}
6556
6557
6558/**
6559 * Sets the variable to a reference (pointer) to a guest register reference.
6560 *
6561 * This does not require stack storage as we know the value and can always
6562 * reload it. Loading is postponed till needed.
6563 *
6564 * @param pReNative The recompiler state.
6565 * @param idxVar The variable.
6566 * @param enmRegClass The class guest registers to reference.
6567 * @param idxReg The register within @a enmRegClass to reference.
6568 *
6569 * @throws VERR_IEM_VAR_IPE_2
6570 */
6571static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6572 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6573{
6574 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6575
6576 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
6577 {
6578 /* Only simple transitions for now. */
6579 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6580 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6581 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
6582 }
6583 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6584
6585 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
6586 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
6587}
6588
6589
6590DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6591{
6592 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6593}
6594
6595
6596DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6597{
6598 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
6599
6600 /* Since we're using a generic uint64_t value type, we must truncate it if
6601 the variable is smaller otherwise we may end up with too large value when
6602 scaling up a imm8 w/ sign-extension.
6603
6604 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6605 in the bios, bx=1) when running on arm, because clang expect 16-bit
6606 register parameters to have bits 16 and up set to zero. Instead of
6607 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6608 CF value in the result. */
6609 switch (cbType)
6610 {
6611 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6612 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6613 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6614 }
6615 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6616 return idxVar;
6617}
6618
6619
6620DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6621{
6622 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6623 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6624 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6625 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6626
6627 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6628 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
6629 return idxArgVar;
6630}
6631
6632
6633DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6634{
6635 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6636 /* Don't set to stack now, leave that to the first use as for instance
6637 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6638 return idxVar;
6639}
6640
6641
6642DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6643{
6644 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6645
6646 /* Since we're using a generic uint64_t value type, we must truncate it if
6647 the variable is smaller otherwise we may end up with too large value when
6648 scaling up a imm8 w/ sign-extension. */
6649 switch (cbType)
6650 {
6651 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6652 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6653 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6654 }
6655 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6656 return idxVar;
6657}
6658
6659
6660/**
6661 * Releases the variable's register.
6662 *
6663 * The register must have been previously acquired calling
6664 * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
6665 * iemNativeVarRegisterSetAndAcquire().
6666 */
6667DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6668{
6669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6670 Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
6671 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6672}
6673
6674
6675/**
6676 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6677 * fixed till we call iemNativeVarRegisterRelease.
6678 *
6679 * @returns The host register number.
6680 * @param pReNative The recompiler state.
6681 * @param idxVar The variable.
6682 * @param poff Pointer to the instruction buffer offset.
6683 * In case a register needs to be freed up or the value
6684 * loaded off the stack.
6685 * @param fInitialized Set if the variable must already have been initialized.
6686 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6687 * the case.
6688 * @param idxRegPref Preferred register number or UINT8_MAX.
6689 */
6690DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6691 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
6692{
6693 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6694 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
6695 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6696
6697 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6698 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6699 {
6700 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
6701 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6702 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6703 return idxReg;
6704 }
6705
6706 /*
6707 * If the kind of variable has not yet been set, default to 'stack'.
6708 */
6709 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
6710 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
6711 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
6712 iemNativeVarSetKindToStack(pReNative, idxVar);
6713
6714 /*
6715 * We have to allocate a register for the variable, even if its a stack one
6716 * as we don't know if there are modification being made to it before its
6717 * finalized (todo: analyze and insert hints about that?).
6718 *
6719 * If we can, we try get the correct register for argument variables. This
6720 * is assuming that most argument variables are fetched as close as possible
6721 * to the actual call, so that there aren't any interfering hidden calls
6722 * (memory accesses, etc) inbetween.
6723 *
6724 * If we cannot or it's a variable, we make sure no argument registers
6725 * that will be used by this MC block will be allocated here, and we always
6726 * prefer non-volatile registers to avoid needing to spill stuff for internal
6727 * call.
6728 */
6729 /** @todo Detect too early argument value fetches and warn about hidden
6730 * calls causing less optimal code to be generated in the python script. */
6731
6732 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6733 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6734 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6735 {
6736 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6737 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6738 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6739 }
6740 else if ( idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs)
6741 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6742 {
6743 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6744 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6745 & ~pReNative->Core.bmHstRegsWithGstShadow
6746 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6747 & fNotArgsMask;
6748 if (fRegs)
6749 {
6750 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6751 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6752 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6753 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6754 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6755 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6756 }
6757 else
6758 {
6759 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6760 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6761 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6762 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6763 }
6764 }
6765 else
6766 {
6767 idxReg = idxRegPref;
6768 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6769 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
6770 }
6771 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6772 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6773
6774 /*
6775 * Load it off the stack if we've got a stack slot.
6776 */
6777 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6778 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6779 {
6780 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6781 switch (pReNative->Core.aVars[idxVar].cbVar)
6782 {
6783 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6784 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6785 case 3: AssertFailed(); RT_FALL_THRU();
6786 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6787 default: AssertFailed(); RT_FALL_THRU();
6788 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6789 }
6790 }
6791 else
6792 {
6793 Assert(idxStackSlot == UINT8_MAX);
6794 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6795 }
6796 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6797 return idxReg;
6798}
6799
6800
6801/**
6802 * The value of variable @a idxVar will be written in full to the @a enmGstReg
6803 * guest register.
6804 *
6805 * This function makes sure there is a register for it and sets it to be the
6806 * current shadow copy of @a enmGstReg.
6807 *
6808 * @returns The host register number.
6809 * @param pReNative The recompiler state.
6810 * @param idxVar The variable.
6811 * @param enmGstReg The guest register this variable will be written to
6812 * after this call.
6813 * @param poff Pointer to the instruction buffer offset.
6814 * In case a register needs to be freed up or if the
6815 * variable content needs to be loaded off the stack.
6816 *
6817 * @note We DO NOT expect @a idxVar to be an argument variable,
6818 * because we can only in the commit stage of an instruction when this
6819 * function is used.
6820 */
6821DECL_HIDDEN_THROW(uint8_t)
6822iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
6823{
6824 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6825 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6826 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
6827 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
6828 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
6829 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
6830 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
6831 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6832
6833 /*
6834 * This shouldn't ever be used for arguments, unless it's in a weird else
6835 * branch that doesn't do any calling and even then it's questionable.
6836 *
6837 * However, in case someone writes crazy wrong MC code and does register
6838 * updates before making calls, just use the regular register allocator to
6839 * ensure we get a register suitable for the intended argument number.
6840 */
6841 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
6842
6843 /*
6844 * If there is already a register for the variable, we transfer/set the
6845 * guest shadow copy assignment to it.
6846 */
6847 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
6848 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6849 {
6850 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
6851 {
6852 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
6853 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
6854 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
6855 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
6856 }
6857 else
6858 {
6859 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
6860 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
6861 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
6862 }
6863 /** @todo figure this one out. We need some way of making sure the register isn't
6864 * modified after this point, just in case we start writing crappy MC code. */
6865 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
6866 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6867 return idxReg;
6868 }
6869 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6870
6871 /*
6872 * Because this is supposed to be the commit stage, we're just tag along with the
6873 * temporary register allocator and upgrade it to a variable register.
6874 */
6875 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
6876 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
6877 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
6878 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
6879 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
6880 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6881
6882 /*
6883 * Now we need to load the register value.
6884 */
6885 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
6886 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
6887 else
6888 {
6889 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6890 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6891 switch (pReNative->Core.aVars[idxVar].cbVar)
6892 {
6893 case sizeof(uint64_t):
6894 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
6895 break;
6896 case sizeof(uint32_t):
6897 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
6898 break;
6899 case sizeof(uint16_t):
6900 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
6901 break;
6902 case sizeof(uint8_t):
6903 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
6904 break;
6905 default:
6906 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
6907 }
6908 }
6909
6910 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6911 return idxReg;
6912}
6913
6914
6915/**
6916 * Sets the host register for @a idxVarRc to @a idxReg.
6917 *
6918 * The register must not be allocated. Any guest register shadowing will be
6919 * implictly dropped by this call.
6920 *
6921 * The variable must not have any register associated with it (causes
6922 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
6923 * implied.
6924 *
6925 * @returns idxReg
6926 * @param pReNative The recompiler state.
6927 * @param idxVar The variable.
6928 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
6929 * @param off For recording in debug info.
6930 *
6931 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
6932 */
6933DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
6934{
6935 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6936 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
6937 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6938 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
6939 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
6940
6941 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
6942 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6943
6944 iemNativeVarSetKindToStack(pReNative, idxVar);
6945 pReNative->Core.aVars[idxVar].idxReg = idxReg;
6946
6947 return idxReg;
6948}
6949
6950
6951/**
6952 * A convenient helper function.
6953 */
6954DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6955 uint8_t idxReg, uint32_t *poff)
6956{
6957 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
6958 pReNative->Core.aVars[idxVar].fRegAcquired = true;
6959 return idxReg;
6960}
6961
6962
6963/**
6964 * Worker that frees the stack slots for variable @a idxVar if any allocated.
6965 *
6966 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
6967 */
6968DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6969{
6970 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6971 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6972 {
6973 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
6974 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
6975 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
6976 Assert(cSlots > 0);
6977 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
6978 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
6979 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
6980 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6981 }
6982 else
6983 Assert(idxStackSlot == UINT8_MAX);
6984}
6985
6986
6987/**
6988 * Worker that frees a single variable.
6989 *
6990 * ASSUMES that @a idxVar is valid.
6991 */
6992DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6993{
6994 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
6995 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
6996
6997 /* Free the host register first if any assigned. */
6998 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6999 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7000 {
7001 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7002 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7003 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7004 }
7005
7006 /* Free argument mapping. */
7007 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7008 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7009 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7010
7011 /* Free the stack slots. */
7012 iemNativeVarFreeStackSlots(pReNative, idxVar);
7013
7014 /* Free the actual variable. */
7015 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7016 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7017}
7018
7019
7020/**
7021 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7022 */
7023DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7024{
7025 while (bmVars != 0)
7026 {
7027 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7028 bmVars &= ~RT_BIT_32(idxVar);
7029
7030#if 1 /** @todo optimize by simplifying this later... */
7031 iemNativeVarFreeOneWorker(pReNative, idxVar);
7032#else
7033 /* Only need to free the host register, the rest is done as bulk updates below. */
7034 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7035 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7036 {
7037 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
7038 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7039 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7040 }
7041#endif
7042 }
7043#if 0 /** @todo optimize by simplifying this later... */
7044 pReNative->Core.bmVars = 0;
7045 pReNative->Core.bmStack = 0;
7046 pReNative->Core.u64ArgVars = UINT64_MAX;
7047#endif
7048}
7049
7050
7051/**
7052 * This is called by IEM_MC_END() to clean up all variables.
7053 */
7054DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
7055{
7056 uint32_t const bmVars = pReNative->Core.bmVars;
7057 if (bmVars != 0)
7058 iemNativeVarFreeAllSlow(pReNative, bmVars);
7059 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7060 Assert(pReNative->Core.bmStack == 0);
7061}
7062
7063
7064#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
7065
7066/**
7067 * This is called by IEM_MC_FREE_LOCAL.
7068 */
7069DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7070{
7071 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7072 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7073 iemNativeVarFreeOneWorker(pReNative, idxVar);
7074}
7075
7076
7077#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
7078
7079/**
7080 * This is called by IEM_MC_FREE_ARG.
7081 */
7082DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7083{
7084 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7085 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
7086 iemNativeVarFreeOneWorker(pReNative, idxVar);
7087}
7088
7089
7090#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
7091
7092/**
7093 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
7094 */
7095DECL_INLINE_THROW(uint32_t)
7096iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
7097{
7098 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7099 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
7100 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7101 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
7102 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
7103
7104 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
7105 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
7106 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
7107 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7108
7109 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
7110
7111 /*
7112 * Special case for immediates.
7113 */
7114 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
7115 {
7116 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7117 {
7118 case sizeof(uint16_t):
7119 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7120 break;
7121 case sizeof(uint32_t):
7122 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
7123 break;
7124 default: AssertFailed(); break;
7125 }
7126 }
7127 else
7128 {
7129 /*
7130 * The generic solution for now.
7131 */
7132 /** @todo optimize this by having the python script make sure the source
7133 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
7134 * statement. Then we could just transfer the register assignments. */
7135 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
7136 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
7137 switch (pReNative->Core.aVars[idxVarDst].cbVar)
7138 {
7139 case sizeof(uint16_t):
7140 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
7141 break;
7142 case sizeof(uint32_t):
7143 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
7144 break;
7145 default: AssertFailed(); break;
7146 }
7147 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
7148 iemNativeVarRegisterRelease(pReNative, idxVarDst);
7149 }
7150 return off;
7151}
7152
7153
7154
7155/*********************************************************************************************************************************
7156* Emitters for IEM_MC_CALL_CIMPL_XXX *
7157*********************************************************************************************************************************/
7158
7159/**
7160 * Emits code to load a reference to the given guest register into @a idxGprDst.
7161 */
7162DECL_INLINE_THROW(uint32_t)
7163iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7164 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7165{
7166 /*
7167 * Get the offset relative to the CPUMCTX structure.
7168 */
7169 uint32_t offCpumCtx;
7170 switch (enmClass)
7171 {
7172 case kIemNativeGstRegRef_Gpr:
7173 Assert(idxRegInClass < 16);
7174 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7175 break;
7176
7177 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7178 Assert(idxRegInClass < 4);
7179 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7180 break;
7181
7182 case kIemNativeGstRegRef_EFlags:
7183 Assert(idxRegInClass == 0);
7184 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7185 break;
7186
7187 case kIemNativeGstRegRef_MxCsr:
7188 Assert(idxRegInClass == 0);
7189 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7190 break;
7191
7192 case kIemNativeGstRegRef_FpuReg:
7193 Assert(idxRegInClass < 8);
7194 AssertFailed(); /** @todo what kind of indexing? */
7195 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7196 break;
7197
7198 case kIemNativeGstRegRef_MReg:
7199 Assert(idxRegInClass < 8);
7200 AssertFailed(); /** @todo what kind of indexing? */
7201 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7202 break;
7203
7204 case kIemNativeGstRegRef_XReg:
7205 Assert(idxRegInClass < 16);
7206 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7207 break;
7208
7209 default:
7210 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7211 }
7212
7213 /*
7214 * Load the value into the destination register.
7215 */
7216#ifdef RT_ARCH_AMD64
7217 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7218
7219#elif defined(RT_ARCH_ARM64)
7220 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7221 Assert(offCpumCtx < 4096);
7222 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7223
7224#else
7225# error "Port me!"
7226#endif
7227
7228 return off;
7229}
7230
7231
7232/**
7233 * Common code for CIMPL and AIMPL calls.
7234 *
7235 * These are calls that uses argument variables and such. They should not be
7236 * confused with internal calls required to implement an MC operation,
7237 * like a TLB load and similar.
7238 *
7239 * Upon return all that is left to do is to load any hidden arguments and
7240 * perform the call. All argument variables are freed.
7241 *
7242 * @returns New code buffer offset; throws VBox status code on error.
7243 * @param pReNative The native recompile state.
7244 * @param off The code buffer offset.
7245 * @param cArgs The total nubmer of arguments (includes hidden
7246 * count).
7247 * @param cHiddenArgs The number of hidden arguments. The hidden
7248 * arguments must not have any variable declared for
7249 * them, whereas all the regular arguments must
7250 * (tstIEMCheckMc ensures this).
7251 */
7252DECL_HIDDEN_THROW(uint32_t)
7253iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7254{
7255#ifdef VBOX_STRICT
7256 /*
7257 * Assert sanity.
7258 */
7259 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7260 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7261 for (unsigned i = 0; i < cHiddenArgs; i++)
7262 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7263 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7264 {
7265 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7266 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7267 }
7268 iemNativeRegAssertSanity(pReNative);
7269#endif
7270
7271 /*
7272 * Before we do anything else, go over variables that are referenced and
7273 * make sure they are not in a register.
7274 */
7275 uint32_t bmVars = pReNative->Core.bmVars;
7276 if (bmVars)
7277 {
7278 do
7279 {
7280 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7281 bmVars &= ~RT_BIT_32(idxVar);
7282
7283 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7284 {
7285 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7286 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7287 {
7288 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7289 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7290 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7291 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7292 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7293
7294 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7295 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7296 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7297 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7298 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7299 }
7300 }
7301 } while (bmVars != 0);
7302#if 0 //def VBOX_STRICT
7303 iemNativeRegAssertSanity(pReNative);
7304#endif
7305 }
7306
7307 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7308
7309 /*
7310 * First, go over the host registers that will be used for arguments and make
7311 * sure they either hold the desired argument or are free.
7312 */
7313 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7314 {
7315 for (uint32_t i = 0; i < cRegArgs; i++)
7316 {
7317 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7318 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7319 {
7320 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7321 {
7322 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7323 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7324 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
7325 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7326 if (uArgNo == i)
7327 { /* prefect */ }
7328 /* The variable allocator logic should make sure this is impossible,
7329 except for when the return register is used as a parameter (ARM,
7330 but not x86). */
7331#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7332 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7333 {
7334# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7335# error "Implement this"
7336# endif
7337 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7338 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7339 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7340 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7341 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7342 }
7343#endif
7344 else
7345 {
7346 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7347
7348 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
7349 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7350 else
7351 {
7352 /* just free it, can be reloaded if used again */
7353 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7354 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7355 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7356 }
7357 }
7358 }
7359 else
7360 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7361 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7362 }
7363 }
7364#if 0 //def VBOX_STRICT
7365 iemNativeRegAssertSanity(pReNative);
7366#endif
7367 }
7368
7369 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7370
7371#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7372 /*
7373 * If there are any stack arguments, make sure they are in their place as well.
7374 *
7375 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7376 * the caller) be loading it later and it must be free (see first loop).
7377 */
7378 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7379 {
7380 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7381 {
7382 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7383 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7384 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7385 {
7386 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7387 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
7388 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
7389 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7390 }
7391 else
7392 {
7393 /* Use ARG0 as temp for stuff we need registers for. */
7394 switch (pReNative->Core.aVars[idxVar].enmKind)
7395 {
7396 case kIemNativeVarKind_Stack:
7397 {
7398 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7399 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7400 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7401 iemNativeStackCalcBpDisp(idxStackSlot));
7402 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7403 continue;
7404 }
7405
7406 case kIemNativeVarKind_Immediate:
7407 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
7408 continue;
7409
7410 case kIemNativeVarKind_VarRef:
7411 {
7412 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7413 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7414 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7415 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7416 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7417 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7418 {
7419 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7420 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7421 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7422 }
7423 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7424 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7425 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7426 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7427 continue;
7428 }
7429
7430 case kIemNativeVarKind_GstRegRef:
7431 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7432 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7433 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7434 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7435 continue;
7436
7437 case kIemNativeVarKind_Invalid:
7438 case kIemNativeVarKind_End:
7439 break;
7440 }
7441 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7442 }
7443 }
7444# if 0 //def VBOX_STRICT
7445 iemNativeRegAssertSanity(pReNative);
7446# endif
7447 }
7448#else
7449 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7450#endif
7451
7452 /*
7453 * Make sure the argument variables are loaded into their respective registers.
7454 *
7455 * We can optimize this by ASSUMING that any register allocations are for
7456 * registeres that have already been loaded and are ready. The previous step
7457 * saw to that.
7458 */
7459 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7460 {
7461 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7462 {
7463 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7464 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7465 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
7466 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
7467 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
7468 else
7469 {
7470 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7471 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7472 {
7473 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7474 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
7475 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
7476 | RT_BIT_32(idxArgReg);
7477 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
7478 }
7479 else
7480 {
7481 /* Use ARG0 as temp for stuff we need registers for. */
7482 switch (pReNative->Core.aVars[idxVar].enmKind)
7483 {
7484 case kIemNativeVarKind_Stack:
7485 {
7486 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7487 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7488 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7489 continue;
7490 }
7491
7492 case kIemNativeVarKind_Immediate:
7493 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
7494 continue;
7495
7496 case kIemNativeVarKind_VarRef:
7497 {
7498 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
7499 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7500 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
7501 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7502 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7503 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7504 {
7505 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7506 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7507 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7508 }
7509 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7510 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7511 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
7512 continue;
7513 }
7514
7515 case kIemNativeVarKind_GstRegRef:
7516 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
7517 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
7518 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
7519 continue;
7520
7521 case kIemNativeVarKind_Invalid:
7522 case kIemNativeVarKind_End:
7523 break;
7524 }
7525 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7526 }
7527 }
7528 }
7529#if 0 //def VBOX_STRICT
7530 iemNativeRegAssertSanity(pReNative);
7531#endif
7532 }
7533#ifdef VBOX_STRICT
7534 else
7535 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7536 {
7537 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
7538 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
7539 }
7540#endif
7541
7542 /*
7543 * Free all argument variables (simplified).
7544 * Their lifetime always expires with the call they are for.
7545 */
7546 /** @todo Make the python script check that arguments aren't used after
7547 * IEM_MC_CALL_XXXX. */
7548 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
7549 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
7550 * an argument value. There is also some FPU stuff. */
7551 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
7552 {
7553 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
7554 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
7555
7556 /* no need to free registers: */
7557 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
7558 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
7559 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
7560 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
7561 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
7562 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
7563
7564 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
7565 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7566 iemNativeVarFreeStackSlots(pReNative, idxVar);
7567 }
7568 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
7569
7570 /*
7571 * Flush volatile registers as we make the call.
7572 */
7573 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
7574
7575 return off;
7576}
7577
7578
7579/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
7580DECL_HIDDEN_THROW(uint32_t)
7581iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
7582 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
7583
7584{
7585 /*
7586 * Do all the call setup and cleanup.
7587 */
7588 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
7589
7590 /*
7591 * Load the two or three hidden arguments.
7592 */
7593#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7594 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7595 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7596 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
7597#else
7598 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7599 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
7600#endif
7601
7602 /*
7603 * Make the call and check the return code.
7604 *
7605 * Shadow PC copies are always flushed here, other stuff depends on flags.
7606 * Segment and general purpose registers are explictily flushed via the
7607 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
7608 * macros.
7609 */
7610 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
7611#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7612 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7613#endif
7614 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
7615 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
7616 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
7617 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7618
7619 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7620}
7621
7622
7623#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7624 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
7625
7626/** Emits code for IEM_MC_CALL_CIMPL_1. */
7627DECL_INLINE_THROW(uint32_t)
7628iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7629 uintptr_t pfnCImpl, uint8_t idxArg0)
7630{
7631 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7632 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
7633}
7634
7635
7636#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7637 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
7638
7639/** Emits code for IEM_MC_CALL_CIMPL_2. */
7640DECL_INLINE_THROW(uint32_t)
7641iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7642 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
7643{
7644 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7645 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7646 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
7647}
7648
7649
7650#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7651 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7652 (uintptr_t)a_pfnCImpl, a0, a1, a2)
7653
7654/** Emits code for IEM_MC_CALL_CIMPL_3. */
7655DECL_INLINE_THROW(uint32_t)
7656iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7657 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7658{
7659 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7660 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7661 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7662 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
7663}
7664
7665
7666#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
7667 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7668 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
7669
7670/** Emits code for IEM_MC_CALL_CIMPL_4. */
7671DECL_INLINE_THROW(uint32_t)
7672iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7673 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7674{
7675 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7676 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7677 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7678 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7679 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
7680}
7681
7682
7683#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
7684 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
7685 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
7686
7687/** Emits code for IEM_MC_CALL_CIMPL_4. */
7688DECL_INLINE_THROW(uint32_t)
7689iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
7690 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
7691{
7692 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
7693 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
7694 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
7695 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
7696 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
7697 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
7698}
7699
7700
7701/** Recompiler debugging: Flush guest register shadow copies. */
7702#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
7703
7704
7705
7706/*********************************************************************************************************************************
7707* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
7708*********************************************************************************************************************************/
7709
7710/**
7711 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
7712 */
7713DECL_INLINE_THROW(uint32_t)
7714iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7715 uintptr_t pfnAImpl, uint8_t cArgs)
7716{
7717 if (idxVarRc != UINT8_MAX)
7718 {
7719 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
7720 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
7721 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
7722 }
7723
7724 /*
7725 * Do all the call setup and cleanup.
7726 */
7727 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
7728
7729 /*
7730 * Make the call and update the return code variable if we've got one.
7731 */
7732 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7733 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
7734 {
7735pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
7736 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
7737 }
7738
7739 return off;
7740}
7741
7742
7743
7744#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
7745 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
7746
7747#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
7748 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
7749
7750/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
7751DECL_INLINE_THROW(uint32_t)
7752iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
7753{
7754 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
7755}
7756
7757
7758#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
7759 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
7760
7761#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
7762 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
7763
7764/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
7765DECL_INLINE_THROW(uint32_t)
7766iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
7767{
7768 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7769 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
7770}
7771
7772
7773#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
7774 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
7775
7776#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
7777 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
7778
7779/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
7780DECL_INLINE_THROW(uint32_t)
7781iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7782 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7783{
7784 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7785 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7786 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
7787}
7788
7789
7790#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
7791 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
7792
7793#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
7794 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
7795
7796/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
7797DECL_INLINE_THROW(uint32_t)
7798iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7799 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7800{
7801 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7802 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7803 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7804 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
7805}
7806
7807
7808#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
7809 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7810
7811#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
7812 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
7813
7814/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
7815DECL_INLINE_THROW(uint32_t)
7816iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
7817 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
7818{
7819 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
7820 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
7821 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
7822 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
7823 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
7824}
7825
7826
7827
7828/*********************************************************************************************************************************
7829* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
7830*********************************************************************************************************************************/
7831
7832#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
7833 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
7834
7835#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7836 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
7837
7838#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7839 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
7840
7841#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7842 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
7843
7844
7845/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
7846 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
7847DECL_INLINE_THROW(uint32_t)
7848iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
7849{
7850 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7851 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7852 Assert(iGRegEx < 20);
7853
7854 /* Same discussion as in iemNativeEmitFetchGregU16 */
7855 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7856 kIemNativeGstRegUse_ReadOnly);
7857
7858 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7859 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7860
7861 /* The value is zero-extended to the full 64-bit host register width. */
7862 if (iGRegEx < 16)
7863 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7864 else
7865 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7866
7867 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7868 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7869 return off;
7870}
7871
7872
7873#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
7874 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
7875
7876#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
7877 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
7878
7879#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
7880 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
7881
7882/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
7883DECL_INLINE_THROW(uint32_t)
7884iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
7885{
7886 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7887 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7888 Assert(iGRegEx < 20);
7889
7890 /* Same discussion as in iemNativeEmitFetchGregU16 */
7891 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
7892 kIemNativeGstRegUse_ReadOnly);
7893
7894 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7895 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7896
7897 if (iGRegEx < 16)
7898 {
7899 switch (cbSignExtended)
7900 {
7901 case sizeof(uint16_t):
7902 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7903 break;
7904 case sizeof(uint32_t):
7905 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7906 break;
7907 case sizeof(uint64_t):
7908 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
7909 break;
7910 default: AssertFailed(); break;
7911 }
7912 }
7913 else
7914 {
7915 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
7916 switch (cbSignExtended)
7917 {
7918 case sizeof(uint16_t):
7919 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7920 break;
7921 case sizeof(uint32_t):
7922 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7923 break;
7924 case sizeof(uint64_t):
7925 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
7926 break;
7927 default: AssertFailed(); break;
7928 }
7929 }
7930
7931 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7932 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7933 return off;
7934}
7935
7936
7937
7938#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
7939 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
7940
7941#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
7942 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7943
7944#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
7945 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7946
7947/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
7948DECL_INLINE_THROW(uint32_t)
7949iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
7950{
7951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7952 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
7953 Assert(iGReg < 16);
7954
7955 /*
7956 * We can either just load the low 16-bit of the GPR into a host register
7957 * for the variable, or we can do so via a shadow copy host register. The
7958 * latter will avoid having to reload it if it's being stored later, but
7959 * will waste a host register if it isn't touched again. Since we don't
7960 * know what going to happen, we choose the latter for now.
7961 */
7962 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7963 kIemNativeGstRegUse_ReadOnly);
7964
7965 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7966 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7967 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
7968 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7969
7970 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
7971 return off;
7972}
7973
7974
7975#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
7976 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
7977
7978#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
7979 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
7980
7981/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
7982DECL_INLINE_THROW(uint32_t)
7983iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
7984{
7985 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7986 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
7987 Assert(iGReg < 16);
7988
7989 /*
7990 * We can either just load the low 16-bit of the GPR into a host register
7991 * for the variable, or we can do so via a shadow copy host register. The
7992 * latter will avoid having to reload it if it's being stored later, but
7993 * will waste a host register if it isn't touched again. Since we don't
7994 * know what going to happen, we choose the latter for now.
7995 */
7996 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7997 kIemNativeGstRegUse_ReadOnly);
7998
7999 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8000 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8001 if (cbSignExtended == sizeof(uint32_t))
8002 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8003 else
8004 {
8005 Assert(cbSignExtended == sizeof(uint64_t));
8006 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
8007 }
8008 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8009
8010 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8011 return off;
8012}
8013
8014
8015#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
8016 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
8017
8018#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
8019 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
8020
8021/** Emits code for IEM_MC_FETCH_GREG_U32. */
8022DECL_INLINE_THROW(uint32_t)
8023iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8024{
8025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8026 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
8027 Assert(iGReg < 16);
8028
8029 /*
8030 * We can either just load the low 16-bit of the GPR into a host register
8031 * for the variable, or we can do so via a shadow copy host register. The
8032 * latter will avoid having to reload it if it's being stored later, but
8033 * will waste a host register if it isn't touched again. Since we don't
8034 * know what going to happen, we choose the latter for now.
8035 */
8036 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8037 kIemNativeGstRegUse_ReadOnly);
8038
8039 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8040 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8041 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8042 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8043
8044 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8045 return off;
8046}
8047
8048
8049#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
8050 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
8051
8052/** Emits code for IEM_MC_FETCH_GREG_U32. */
8053DECL_INLINE_THROW(uint32_t)
8054iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8055{
8056 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8057 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8058 Assert(iGReg < 16);
8059
8060 /*
8061 * We can either just load the low 32-bit of the GPR into a host register
8062 * for the variable, or we can do so via a shadow copy host register. The
8063 * latter will avoid having to reload it if it's being stored later, but
8064 * will waste a host register if it isn't touched again. Since we don't
8065 * know what going to happen, we choose the latter for now.
8066 */
8067 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8068 kIemNativeGstRegUse_ReadOnly);
8069
8070 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8071 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8072 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
8073 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8074
8075 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8076 return off;
8077}
8078
8079
8080#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
8081 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8082
8083#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
8084 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
8085
8086/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
8087 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
8088DECL_INLINE_THROW(uint32_t)
8089iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
8090{
8091 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8092 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
8093 Assert(iGReg < 16);
8094
8095 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8096 kIemNativeGstRegUse_ReadOnly);
8097
8098 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8099 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8100 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
8101 /** @todo name the register a shadow one already? */
8102 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8103
8104 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8105 return off;
8106}
8107
8108
8109
8110/*********************************************************************************************************************************
8111* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
8112*********************************************************************************************************************************/
8113
8114#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
8115 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
8116
8117/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
8118DECL_INLINE_THROW(uint32_t)
8119iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
8120{
8121 Assert(iGRegEx < 20);
8122 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8123 kIemNativeGstRegUse_ForUpdate);
8124#ifdef RT_ARCH_AMD64
8125 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8126
8127 /* To the lowest byte of the register: mov r8, imm8 */
8128 if (iGRegEx < 16)
8129 {
8130 if (idxGstTmpReg >= 8)
8131 pbCodeBuf[off++] = X86_OP_REX_B;
8132 else if (idxGstTmpReg >= 4)
8133 pbCodeBuf[off++] = X86_OP_REX;
8134 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8135 pbCodeBuf[off++] = u8Value;
8136 }
8137 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
8138 else if (idxGstTmpReg < 4)
8139 {
8140 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
8141 pbCodeBuf[off++] = u8Value;
8142 }
8143 else
8144 {
8145 /* ror reg64, 8 */
8146 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8147 pbCodeBuf[off++] = 0xc1;
8148 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8149 pbCodeBuf[off++] = 8;
8150
8151 /* mov reg8, imm8 */
8152 if (idxGstTmpReg >= 8)
8153 pbCodeBuf[off++] = X86_OP_REX_B;
8154 else if (idxGstTmpReg >= 4)
8155 pbCodeBuf[off++] = X86_OP_REX;
8156 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
8157 pbCodeBuf[off++] = u8Value;
8158
8159 /* rol reg64, 8 */
8160 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8161 pbCodeBuf[off++] = 0xc1;
8162 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8163 pbCodeBuf[off++] = 8;
8164 }
8165
8166#elif defined(RT_ARCH_ARM64)
8167 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
8168 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8169 if (iGRegEx < 16)
8170 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
8171 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
8172 else
8173 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
8174 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
8175 iemNativeRegFreeTmp(pReNative, idxImmReg);
8176
8177#else
8178# error "Port me!"
8179#endif
8180
8181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8182
8183 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8184
8185 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8186 return off;
8187}
8188
8189
8190#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
8191 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
8192
8193/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
8194DECL_INLINE_THROW(uint32_t)
8195iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
8196{
8197 Assert(iGRegEx < 20);
8198 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8199
8200 /*
8201 * If it's a constant value (unlikely) we treat this as a
8202 * IEM_MC_STORE_GREG_U8_CONST statement.
8203 */
8204 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8205 { /* likely */ }
8206 else
8207 {
8208 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8209 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8210 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8211 }
8212
8213 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8214 kIemNativeGstRegUse_ForUpdate);
8215 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8216
8217#ifdef RT_ARCH_AMD64
8218 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
8219 if (iGRegEx < 16)
8220 {
8221 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8222 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8223 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8224 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
8225 pbCodeBuf[off++] = X86_OP_REX;
8226 pbCodeBuf[off++] = 0x8a;
8227 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8228 }
8229 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
8230 else if (idxGstTmpReg < 4 && idxVarReg < 4)
8231 {
8232 /** @todo test this. */
8233 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
8234 pbCodeBuf[off++] = 0x8a;
8235 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
8236 }
8237 else
8238 {
8239 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
8240
8241 /* ror reg64, 8 */
8242 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8243 pbCodeBuf[off++] = 0xc1;
8244 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8245 pbCodeBuf[off++] = 8;
8246
8247 /* mov reg8, reg8(r/m) */
8248 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
8249 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
8250 else if (idxGstTmpReg >= 4)
8251 pbCodeBuf[off++] = X86_OP_REX;
8252 pbCodeBuf[off++] = 0x8a;
8253 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
8254
8255 /* rol reg64, 8 */
8256 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
8257 pbCodeBuf[off++] = 0xc1;
8258 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8259 pbCodeBuf[off++] = 8;
8260 }
8261
8262#elif defined(RT_ARCH_ARM64)
8263 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
8264 or
8265 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
8266 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8267 if (iGRegEx < 16)
8268 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
8269 else
8270 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
8271
8272#else
8273# error "Port me!"
8274#endif
8275 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8276
8277 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8278
8279 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
8280 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8281 return off;
8282}
8283
8284
8285
8286#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
8287 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
8288
8289/** Emits code for IEM_MC_STORE_GREG_U16. */
8290DECL_INLINE_THROW(uint32_t)
8291iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
8292{
8293 Assert(iGReg < 16);
8294 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8295 kIemNativeGstRegUse_ForUpdate);
8296#ifdef RT_ARCH_AMD64
8297 /* mov reg16, imm16 */
8298 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8299 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8300 if (idxGstTmpReg >= 8)
8301 pbCodeBuf[off++] = X86_OP_REX_B;
8302 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
8303 pbCodeBuf[off++] = RT_BYTE1(uValue);
8304 pbCodeBuf[off++] = RT_BYTE2(uValue);
8305
8306#elif defined(RT_ARCH_ARM64)
8307 /* movk xdst, #uValue, lsl #0 */
8308 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8309 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
8310
8311#else
8312# error "Port me!"
8313#endif
8314
8315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8316
8317 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8318 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8319 return off;
8320}
8321
8322
8323#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
8324 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
8325
8326/** Emits code for IEM_MC_STORE_GREG_U16. */
8327DECL_INLINE_THROW(uint32_t)
8328iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8329{
8330 Assert(iGReg < 16);
8331 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8332
8333 /*
8334 * If it's a constant value (unlikely) we treat this as a
8335 * IEM_MC_STORE_GREG_U16_CONST statement.
8336 */
8337 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8338 { /* likely */ }
8339 else
8340 {
8341 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8342 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8343 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8344 }
8345
8346 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8347 kIemNativeGstRegUse_ForUpdate);
8348
8349#ifdef RT_ARCH_AMD64
8350 /* mov reg16, reg16 or [mem16] */
8351 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
8352 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8353 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8354 {
8355 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
8356 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
8357 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
8358 pbCodeBuf[off++] = 0x8b;
8359 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
8360 }
8361 else
8362 {
8363 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
8364 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8365 if (idxGstTmpReg >= 8)
8366 pbCodeBuf[off++] = X86_OP_REX_R;
8367 pbCodeBuf[off++] = 0x8b;
8368 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8369 }
8370
8371#elif defined(RT_ARCH_ARM64)
8372 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
8373 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
8374 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8375 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
8376 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8377
8378#else
8379# error "Port me!"
8380#endif
8381
8382 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8383
8384 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8385 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8386 return off;
8387}
8388
8389
8390#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
8391 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
8392
8393/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
8394DECL_INLINE_THROW(uint32_t)
8395iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
8396{
8397 Assert(iGReg < 16);
8398 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8399 kIemNativeGstRegUse_ForFullWrite);
8400 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8402 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8403 return off;
8404}
8405
8406
8407#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
8408 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
8409
8410/** Emits code for IEM_MC_STORE_GREG_U32. */
8411DECL_INLINE_THROW(uint32_t)
8412iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8413{
8414 Assert(iGReg < 16);
8415 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8416
8417 /*
8418 * If it's a constant value (unlikely) we treat this as a
8419 * IEM_MC_STORE_GREG_U32_CONST statement.
8420 */
8421 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8422 { /* likely */ }
8423 else
8424 {
8425 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8426 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8427 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
8428 }
8429
8430 /*
8431 * For the rest we allocate a guest register for the variable and writes
8432 * it to the CPUMCTX structure.
8433 */
8434 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8435 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8436#ifdef VBOX_STRICT
8437 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
8438#endif
8439 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8440 return off;
8441}
8442
8443
8444#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
8445 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
8446
8447/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
8448DECL_INLINE_THROW(uint32_t)
8449iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
8450{
8451 Assert(iGReg < 16);
8452 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8453 kIemNativeGstRegUse_ForFullWrite);
8454 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
8455 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8456 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8457 return off;
8458}
8459
8460
8461#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
8462 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
8463
8464/** Emits code for IEM_MC_STORE_GREG_U64. */
8465DECL_INLINE_THROW(uint32_t)
8466iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
8467{
8468 Assert(iGReg < 16);
8469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
8470
8471 /*
8472 * If it's a constant value (unlikely) we treat this as a
8473 * IEM_MC_STORE_GREG_U64_CONST statement.
8474 */
8475 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
8476 { /* likely */ }
8477 else
8478 {
8479 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
8480 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8481 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
8482 }
8483
8484 /*
8485 * For the rest we allocate a guest register for the variable and writes
8486 * it to the CPUMCTX structure.
8487 */
8488 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
8489 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8490 iemNativeVarRegisterRelease(pReNative, idxValueVar);
8491 return off;
8492}
8493
8494
8495#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
8496 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
8497
8498/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
8499DECL_INLINE_THROW(uint32_t)
8500iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
8501{
8502 Assert(iGReg < 16);
8503 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8504 kIemNativeGstRegUse_ForUpdate);
8505 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
8506 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8507 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8508 return off;
8509}
8510
8511
8512/*********************************************************************************************************************************
8513* General purpose register manipulation (add, sub). *
8514*********************************************************************************************************************************/
8515
8516#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8517 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8518
8519/** Emits code for IEM_MC_ADD_GREG_U16. */
8520DECL_INLINE_THROW(uint32_t)
8521iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
8522{
8523 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8524 kIemNativeGstRegUse_ForUpdate);
8525
8526#ifdef RT_ARCH_AMD64
8527 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8528 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8529 if (idxGstTmpReg >= 8)
8530 pbCodeBuf[off++] = X86_OP_REX_B;
8531 if (uAddend == 1)
8532 {
8533 pbCodeBuf[off++] = 0xff; /* inc */
8534 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8535 }
8536 else
8537 {
8538 pbCodeBuf[off++] = 0x81;
8539 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8540 pbCodeBuf[off++] = uAddend;
8541 pbCodeBuf[off++] = 0;
8542 }
8543
8544#else
8545 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8546 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8547
8548 /* sub tmp, gstgrp, uAddend */
8549 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
8550
8551 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8552 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8553
8554 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8555#endif
8556
8557 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8558
8559 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8560
8561 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8562 return off;
8563}
8564
8565
8566#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
8567 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8568
8569#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
8570 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8571
8572/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
8573DECL_INLINE_THROW(uint32_t)
8574iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
8575{
8576 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8577 kIemNativeGstRegUse_ForUpdate);
8578
8579#ifdef RT_ARCH_AMD64
8580 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8581 if (f64Bit)
8582 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8583 else if (idxGstTmpReg >= 8)
8584 pbCodeBuf[off++] = X86_OP_REX_B;
8585 if (uAddend == 1)
8586 {
8587 pbCodeBuf[off++] = 0xff; /* inc */
8588 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8589 }
8590 else if (uAddend < 128)
8591 {
8592 pbCodeBuf[off++] = 0x83; /* add */
8593 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8594 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8595 }
8596 else
8597 {
8598 pbCodeBuf[off++] = 0x81; /* add */
8599 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
8600 pbCodeBuf[off++] = RT_BYTE1(uAddend);
8601 pbCodeBuf[off++] = 0;
8602 pbCodeBuf[off++] = 0;
8603 pbCodeBuf[off++] = 0;
8604 }
8605
8606#else
8607 /* sub tmp, gstgrp, uAddend */
8608 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8609 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
8610
8611#endif
8612
8613 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8614
8615 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8616
8617 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8618 return off;
8619}
8620
8621
8622
8623#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
8624 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
8625
8626/** Emits code for IEM_MC_SUB_GREG_U16. */
8627DECL_INLINE_THROW(uint32_t)
8628iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
8629{
8630 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8631 kIemNativeGstRegUse_ForUpdate);
8632
8633#ifdef RT_ARCH_AMD64
8634 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8635 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8636 if (idxGstTmpReg >= 8)
8637 pbCodeBuf[off++] = X86_OP_REX_B;
8638 if (uSubtrahend == 1)
8639 {
8640 pbCodeBuf[off++] = 0xff; /* dec */
8641 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8642 }
8643 else
8644 {
8645 pbCodeBuf[off++] = 0x81;
8646 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8647 pbCodeBuf[off++] = uSubtrahend;
8648 pbCodeBuf[off++] = 0;
8649 }
8650
8651#else
8652 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8653 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8654
8655 /* sub tmp, gstgrp, uSubtrahend */
8656 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
8657
8658 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
8659 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
8660
8661 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8662#endif
8663
8664 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8665
8666 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8667
8668 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8669 return off;
8670}
8671
8672
8673#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
8674 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
8675
8676#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
8677 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
8678
8679/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
8680DECL_INLINE_THROW(uint32_t)
8681iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
8682{
8683 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
8684 kIemNativeGstRegUse_ForUpdate);
8685
8686#ifdef RT_ARCH_AMD64
8687 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8688 if (f64Bit)
8689 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
8690 else if (idxGstTmpReg >= 8)
8691 pbCodeBuf[off++] = X86_OP_REX_B;
8692 if (uSubtrahend == 1)
8693 {
8694 pbCodeBuf[off++] = 0xff; /* dec */
8695 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
8696 }
8697 else if (uSubtrahend < 128)
8698 {
8699 pbCodeBuf[off++] = 0x83; /* sub */
8700 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8701 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8702 }
8703 else
8704 {
8705 pbCodeBuf[off++] = 0x81; /* sub */
8706 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
8707 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
8708 pbCodeBuf[off++] = 0;
8709 pbCodeBuf[off++] = 0;
8710 pbCodeBuf[off++] = 0;
8711 }
8712
8713#else
8714 /* sub tmp, gstgrp, uSubtrahend */
8715 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8716 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
8717
8718#endif
8719
8720 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8721
8722 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
8723
8724 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
8725 return off;
8726}
8727
8728
8729
8730/*********************************************************************************************************************************
8731* EFLAGS *
8732*********************************************************************************************************************************/
8733
8734#define IEM_MC_FETCH_EFLAGS(a_EFlags) \
8735 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
8736
8737/** Handles IEM_MC_FETCH_EFLAGS. */
8738DECL_INLINE_THROW(uint32_t)
8739iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8740{
8741 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8742 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8743
8744 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
8745 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8746 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8747 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8748 return off;
8749}
8750
8751
8752#define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
8753 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
8754
8755/** Handles IEM_MC_COMMIT_EFLAGS. */
8756DECL_INLINE_THROW(uint32_t)
8757iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
8758{
8759 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
8760 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
8761
8762 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
8763
8764#ifdef VBOX_STRICT
8765 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
8766 off = iemNativeEmitJnzToFixed(pReNative, off, 1);
8767 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
8768
8769 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
8770 off = iemNativeEmitJzToFixed(pReNative, off, 1);
8771 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
8772#endif
8773
8774 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
8775 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
8776 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
8777 return off;
8778}
8779
8780
8781
8782/*********************************************************************************************************************************
8783* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
8784*********************************************************************************************************************************/
8785
8786#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
8787 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
8788
8789#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
8790 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
8791
8792#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
8793 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
8794
8795
8796/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
8797 * IEM_MC_FETCH_SREG_ZX_U64. */
8798DECL_INLINE_THROW(uint32_t)
8799iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
8800{
8801 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8802 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
8803 Assert(iSReg < X86_SREG_COUNT);
8804
8805 /*
8806 * For now, we will not create a shadow copy of a selector. The rational
8807 * is that since we do not recompile the popping and loading of segment
8808 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
8809 * pushing and moving to registers, there is only a small chance that the
8810 * shadow copy will be accessed again before the register is reloaded. One
8811 * scenario would be nested called in 16-bit code, but I doubt it's worth
8812 * the extra register pressure atm.
8813 *
8814 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
8815 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
8816 * store scencario covered at present (r160730).
8817 */
8818 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8819 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8820 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
8821 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8822 return off;
8823}
8824
8825
8826
8827/*********************************************************************************************************************************
8828* Register references. *
8829*********************************************************************************************************************************/
8830
8831#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
8832 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
8833
8834#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
8835 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
8836
8837/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
8838DECL_INLINE_THROW(uint32_t)
8839iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
8840{
8841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8842 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8843 Assert(iGRegEx < 20);
8844
8845 if (iGRegEx < 16)
8846 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8847 else
8848 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
8849
8850 /* If we've delayed writing back the register value, flush it now. */
8851 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
8852
8853 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8854 if (!fConst)
8855 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
8856
8857 return off;
8858}
8859
8860#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
8861 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
8862
8863#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
8864 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
8865
8866#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
8867 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
8868
8869#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
8870 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
8871
8872#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
8873 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
8874
8875#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
8876 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
8877
8878#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
8879 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
8880
8881#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
8882 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
8883
8884#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
8885 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
8886
8887#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
8888 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
8889
8890/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
8891DECL_INLINE_THROW(uint32_t)
8892iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
8893{
8894 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8895 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8896 Assert(iGReg < 16);
8897
8898 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
8899
8900 /* If we've delayed writing back the register value, flush it now. */
8901 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
8902
8903 /* If it's not a const reference we need to flush the shadow copy of the register now. */
8904 if (!fConst)
8905 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
8906
8907 return off;
8908}
8909
8910
8911#define IEM_MC_REF_EFLAGS(a_pEFlags) \
8912 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
8913
8914/** Handles IEM_MC_REF_EFLAGS. */
8915DECL_INLINE_THROW(uint32_t)
8916iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
8917{
8918 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
8919 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
8920
8921 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
8922
8923 /* If we've delayed writing back the register value, flush it now. */
8924 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
8925
8926 /* If there is a shadow copy of guest EFLAGS, flush it now. */
8927 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
8928
8929 return off;
8930}
8931
8932
8933/*********************************************************************************************************************************
8934* Effective Address Calculation *
8935*********************************************************************************************************************************/
8936#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
8937 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
8938
8939/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
8940 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
8941DECL_INLINE_THROW(uint32_t)
8942iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8943 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
8944{
8945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
8946
8947 /*
8948 * Handle the disp16 form with no registers first.
8949 *
8950 * Convert to an immediate value, as that'll delay the register allocation
8951 * and assignment till the memory access / call / whatever and we can use
8952 * a more appropriate register (or none at all).
8953 */
8954 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
8955 {
8956 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
8957 return off;
8958 }
8959
8960 /* Determin the displacment. */
8961 uint16_t u16EffAddr;
8962 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
8963 {
8964 case 0: u16EffAddr = 0; break;
8965 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
8966 case 2: u16EffAddr = u16Disp; break;
8967 default: AssertFailedStmt(u16EffAddr = 0);
8968 }
8969
8970 /* Determine the registers involved. */
8971 uint8_t idxGstRegBase;
8972 uint8_t idxGstRegIndex;
8973 switch (bRm & X86_MODRM_RM_MASK)
8974 {
8975 case 0:
8976 idxGstRegBase = X86_GREG_xBX;
8977 idxGstRegIndex = X86_GREG_xSI;
8978 break;
8979 case 1:
8980 idxGstRegBase = X86_GREG_xBX;
8981 idxGstRegIndex = X86_GREG_xDI;
8982 break;
8983 case 2:
8984 idxGstRegBase = X86_GREG_xBP;
8985 idxGstRegIndex = X86_GREG_xSI;
8986 break;
8987 case 3:
8988 idxGstRegBase = X86_GREG_xBP;
8989 idxGstRegIndex = X86_GREG_xDI;
8990 break;
8991 case 4:
8992 idxGstRegBase = X86_GREG_xSI;
8993 idxGstRegIndex = UINT8_MAX;
8994 break;
8995 case 5:
8996 idxGstRegBase = X86_GREG_xDI;
8997 idxGstRegIndex = UINT8_MAX;
8998 break;
8999 case 6:
9000 idxGstRegBase = X86_GREG_xBP;
9001 idxGstRegIndex = UINT8_MAX;
9002 break;
9003#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
9004 default:
9005#endif
9006 case 7:
9007 idxGstRegBase = X86_GREG_xBX;
9008 idxGstRegIndex = UINT8_MAX;
9009 break;
9010 }
9011
9012 /*
9013 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
9014 */
9015 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9016 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9017 kIemNativeGstRegUse_ReadOnly);
9018 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
9019 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9020 kIemNativeGstRegUse_ReadOnly)
9021 : UINT8_MAX;
9022#ifdef RT_ARCH_AMD64
9023 if (idxRegIndex == UINT8_MAX)
9024 {
9025 if (u16EffAddr == 0)
9026 {
9027 /* movxz ret, base */
9028 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
9029 }
9030 else
9031 {
9032 /* lea ret32, [base64 + disp32] */
9033 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9034 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9035 if (idxRegRet >= 8 || idxRegBase >= 8)
9036 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9037 pbCodeBuf[off++] = 0x8d;
9038 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9039 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
9040 else
9041 {
9042 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
9043 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9044 }
9045 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9046 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9047 pbCodeBuf[off++] = 0;
9048 pbCodeBuf[off++] = 0;
9049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9050
9051 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9052 }
9053 }
9054 else
9055 {
9056 /* lea ret32, [index64 + base64 (+ disp32)] */
9057 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9058 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9059 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9060 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9061 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9062 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9063 pbCodeBuf[off++] = 0x8d;
9064 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
9065 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9066 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
9067 if (bMod == X86_MOD_MEM4)
9068 {
9069 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
9070 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
9071 pbCodeBuf[off++] = 0;
9072 pbCodeBuf[off++] = 0;
9073 }
9074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9075 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
9076 }
9077
9078#elif defined(RT_ARCH_ARM64)
9079 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9080 if (u16EffAddr == 0)
9081 {
9082 if (idxRegIndex == UINT8_MAX)
9083 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
9084 else
9085 {
9086 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
9087 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9088 }
9089 }
9090 else
9091 {
9092 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
9093 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
9094 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
9095 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9096 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
9097 else
9098 {
9099 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
9100 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9101 }
9102 if (idxRegIndex != UINT8_MAX)
9103 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
9104 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
9105 }
9106
9107#else
9108# error "port me"
9109#endif
9110
9111 if (idxRegIndex != UINT8_MAX)
9112 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9113 iemNativeRegFreeTmp(pReNative, idxRegBase);
9114 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9115 return off;
9116}
9117
9118
9119#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
9120 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
9121
9122/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
9123 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
9124DECL_INLINE_THROW(uint32_t)
9125iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9126 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
9127{
9128 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9129
9130 /*
9131 * Handle the disp32 form with no registers first.
9132 *
9133 * Convert to an immediate value, as that'll delay the register allocation
9134 * and assignment till the memory access / call / whatever and we can use
9135 * a more appropriate register (or none at all).
9136 */
9137 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9138 {
9139 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
9140 return off;
9141 }
9142
9143 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9144 uint32_t u32EffAddr = 0;
9145 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9146 {
9147 case 0: break;
9148 case 1: u32EffAddr = (int8_t)u32Disp; break;
9149 case 2: u32EffAddr = u32Disp; break;
9150 default: AssertFailed();
9151 }
9152
9153 /* Get the register (or SIB) value. */
9154 uint8_t idxGstRegBase = UINT8_MAX;
9155 uint8_t idxGstRegIndex = UINT8_MAX;
9156 uint8_t cShiftIndex = 0;
9157 switch (bRm & X86_MODRM_RM_MASK)
9158 {
9159 case 0: idxGstRegBase = X86_GREG_xAX; break;
9160 case 1: idxGstRegBase = X86_GREG_xCX; break;
9161 case 2: idxGstRegBase = X86_GREG_xDX; break;
9162 case 3: idxGstRegBase = X86_GREG_xBX; break;
9163 case 4: /* SIB */
9164 {
9165 /* index /w scaling . */
9166 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9167 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9168 {
9169 case 0: idxGstRegIndex = X86_GREG_xAX; break;
9170 case 1: idxGstRegIndex = X86_GREG_xCX; break;
9171 case 2: idxGstRegIndex = X86_GREG_xDX; break;
9172 case 3: idxGstRegIndex = X86_GREG_xBX; break;
9173 case 4: cShiftIndex = 0; /*no index*/ break;
9174 case 5: idxGstRegIndex = X86_GREG_xBP; break;
9175 case 6: idxGstRegIndex = X86_GREG_xSI; break;
9176 case 7: idxGstRegIndex = X86_GREG_xDI; break;
9177 }
9178
9179 /* base */
9180 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
9181 {
9182 case 0: idxGstRegBase = X86_GREG_xAX; break;
9183 case 1: idxGstRegBase = X86_GREG_xCX; break;
9184 case 2: idxGstRegBase = X86_GREG_xDX; break;
9185 case 3: idxGstRegBase = X86_GREG_xBX; break;
9186 case 4:
9187 idxGstRegBase = X86_GREG_xSP;
9188 u32EffAddr += uSibAndRspOffset >> 8;
9189 break;
9190 case 5:
9191 if ((bRm & X86_MODRM_MOD_MASK) != 0)
9192 idxGstRegBase = X86_GREG_xBP;
9193 else
9194 {
9195 Assert(u32EffAddr == 0);
9196 u32EffAddr = u32Disp;
9197 }
9198 break;
9199 case 6: idxGstRegBase = X86_GREG_xSI; break;
9200 case 7: idxGstRegBase = X86_GREG_xDI; break;
9201 }
9202 break;
9203 }
9204 case 5: idxGstRegBase = X86_GREG_xBP; break;
9205 case 6: idxGstRegBase = X86_GREG_xSI; break;
9206 case 7: idxGstRegBase = X86_GREG_xDI; break;
9207 }
9208
9209 /*
9210 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9211 * the start of the function.
9212 */
9213 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9214 {
9215 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
9216 return off;
9217 }
9218
9219 /*
9220 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9221 */
9222 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9223 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9224 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9225 kIemNativeGstRegUse_ReadOnly);
9226 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9227 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9228 kIemNativeGstRegUse_ReadOnly);
9229
9230 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9231 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9232 {
9233 idxRegBase = idxRegIndex;
9234 idxRegIndex = UINT8_MAX;
9235 }
9236
9237#ifdef RT_ARCH_AMD64
9238 if (idxRegIndex == UINT8_MAX)
9239 {
9240 if (u32EffAddr == 0)
9241 {
9242 /* mov ret, base */
9243 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9244 }
9245 else
9246 {
9247 /* lea ret32, [base64 + disp32] */
9248 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9249 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9250 if (idxRegRet >= 8 || idxRegBase >= 8)
9251 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
9252 pbCodeBuf[off++] = 0x8d;
9253 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9254 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9255 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9256 else
9257 {
9258 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9259 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9260 }
9261 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9262 if (bMod == X86_MOD_MEM4)
9263 {
9264 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9265 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9266 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9267 }
9268 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9269 }
9270 }
9271 else
9272 {
9273 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9274 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9275 if (idxRegBase == UINT8_MAX)
9276 {
9277 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
9278 if (idxRegRet >= 8 || idxRegIndex >= 8)
9279 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9280 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9281 pbCodeBuf[off++] = 0x8d;
9282 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9283 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9284 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9285 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9286 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9287 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9288 }
9289 else
9290 {
9291 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9292 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9293 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9294 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9295 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
9296 pbCodeBuf[off++] = 0x8d;
9297 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9298 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9299 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9300 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9301 if (bMod != X86_MOD_MEM0)
9302 {
9303 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9304 if (bMod == X86_MOD_MEM4)
9305 {
9306 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9307 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9308 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9309 }
9310 }
9311 }
9312 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9313 }
9314
9315#elif defined(RT_ARCH_ARM64)
9316 if (u32EffAddr == 0)
9317 {
9318 if (idxRegIndex == UINT8_MAX)
9319 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9320 else if (idxRegBase == UINT8_MAX)
9321 {
9322 if (cShiftIndex == 0)
9323 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
9324 else
9325 {
9326 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9327 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
9328 }
9329 }
9330 else
9331 {
9332 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9333 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9334 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9335 }
9336 }
9337 else
9338 {
9339 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
9340 {
9341 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9342 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
9343 }
9344 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
9345 {
9346 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9347 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
9348 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
9349 }
9350 else
9351 {
9352 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
9353 if (idxRegBase != UINT8_MAX)
9354 {
9355 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9356 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
9357 }
9358 }
9359 if (idxRegIndex != UINT8_MAX)
9360 {
9361 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9362 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9363 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
9364 }
9365 }
9366
9367#else
9368# error "port me"
9369#endif
9370
9371 if (idxRegIndex != UINT8_MAX)
9372 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9373 if (idxRegBase != UINT8_MAX)
9374 iemNativeRegFreeTmp(pReNative, idxRegBase);
9375 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9376 return off;
9377}
9378
9379
9380#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9381 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9382 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9383
9384#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9385 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9386 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
9387
9388#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
9389 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
9390 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
9391
9392/**
9393 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
9394 *
9395 * @returns New off.
9396 * @param pReNative .
9397 * @param off .
9398 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
9399 * bit 4 to REX.X. The two bits are part of the
9400 * REG sub-field, which isn't needed in this
9401 * function.
9402 * @param uSibAndRspOffset Two parts:
9403 * - The first 8 bits make up the SIB byte.
9404 * - The next 8 bits are the fixed RSP/ESP offset
9405 * in case of a pop [xSP].
9406 * @param u32Disp The displacement byte/word/dword, if any.
9407 * @param cbInstr The size of the fully decoded instruction. Used
9408 * for RIP relative addressing.
9409 * @param idxVarRet The result variable number.
9410 * @param f64Bit Whether to use a 64-bit or 32-bit address size
9411 * when calculating the address.
9412 *
9413 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
9414 */
9415DECL_INLINE_THROW(uint32_t)
9416iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
9417 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
9418{
9419 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
9420
9421 /*
9422 * Special case the rip + disp32 form first.
9423 */
9424 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
9425 {
9426 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9427 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
9428 kIemNativeGstRegUse_ReadOnly);
9429#ifdef RT_ARCH_AMD64
9430 if (f64Bit)
9431 {
9432 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
9433 if ((int32_t)offFinalDisp == offFinalDisp)
9434 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
9435 else
9436 {
9437 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
9438 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
9439 }
9440 }
9441 else
9442 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
9443
9444#elif defined(RT_ARCH_ARM64)
9445 if (f64Bit)
9446 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9447 (int64_t)(int32_t)u32Disp + cbInstr);
9448 else
9449 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
9450 (int32_t)u32Disp + cbInstr);
9451
9452#else
9453# error "Port me!"
9454#endif
9455 iemNativeRegFreeTmp(pReNative, idxRegPc);
9456 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9457 return off;
9458 }
9459
9460 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
9461 int64_t i64EffAddr = 0;
9462 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
9463 {
9464 case 0: break;
9465 case 1: i64EffAddr = (int8_t)u32Disp; break;
9466 case 2: i64EffAddr = (int32_t)u32Disp; break;
9467 default: AssertFailed();
9468 }
9469
9470 /* Get the register (or SIB) value. */
9471 uint8_t idxGstRegBase = UINT8_MAX;
9472 uint8_t idxGstRegIndex = UINT8_MAX;
9473 uint8_t cShiftIndex = 0;
9474 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
9475 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
9476 else /* SIB: */
9477 {
9478 /* index /w scaling . */
9479 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
9480 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
9481 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
9482 if (idxGstRegIndex == 4)
9483 {
9484 /* no index */
9485 cShiftIndex = 0;
9486 idxGstRegIndex = UINT8_MAX;
9487 }
9488
9489 /* base */
9490 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
9491 if (idxGstRegBase == 4)
9492 {
9493 /* pop [rsp] hack */
9494 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
9495 }
9496 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
9497 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
9498 {
9499 /* mod=0 and base=5 -> disp32, no base reg. */
9500 Assert(i64EffAddr == 0);
9501 i64EffAddr = (int32_t)u32Disp;
9502 idxGstRegBase = UINT8_MAX;
9503 }
9504 }
9505
9506 /*
9507 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
9508 * the start of the function.
9509 */
9510 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
9511 {
9512 if (f64Bit)
9513 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
9514 else
9515 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
9516 return off;
9517 }
9518
9519 /*
9520 * Now emit code that calculates:
9521 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9522 * or if !f64Bit:
9523 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
9524 */
9525 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
9526 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
9527 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
9528 kIemNativeGstRegUse_ReadOnly);
9529 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
9530 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
9531 kIemNativeGstRegUse_ReadOnly);
9532
9533 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
9534 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
9535 {
9536 idxRegBase = idxRegIndex;
9537 idxRegIndex = UINT8_MAX;
9538 }
9539
9540#ifdef RT_ARCH_AMD64
9541 uint8_t bFinalAdj;
9542 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
9543 bFinalAdj = 0; /* likely */
9544 else
9545 {
9546 /* pop [rsp] with a problematic disp32 value. Split out the
9547 RSP offset and add it separately afterwards (bFinalAdj). */
9548 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
9549 Assert(idxGstRegBase == X86_GREG_xSP);
9550 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
9551 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
9552 Assert(bFinalAdj != 0);
9553 i64EffAddr -= bFinalAdj;
9554 Assert((int32_t)i64EffAddr == i64EffAddr);
9555 }
9556 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
9557//pReNative->pInstrBuf[off++] = 0xcc;
9558
9559 if (idxRegIndex == UINT8_MAX)
9560 {
9561 if (u32EffAddr == 0)
9562 {
9563 /* mov ret, base */
9564 if (f64Bit)
9565 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
9566 else
9567 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
9568 }
9569 else
9570 {
9571 /* lea ret, [base + disp32] */
9572 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
9573 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9574 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
9575 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9576 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9577 | (f64Bit ? X86_OP_REX_W : 0);
9578 pbCodeBuf[off++] = 0x8d;
9579 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9580 if (idxRegBase != X86_GREG_x12 /*SIB*/)
9581 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
9582 else
9583 {
9584 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9585 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
9586 }
9587 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9588 if (bMod == X86_MOD_MEM4)
9589 {
9590 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9591 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9592 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9593 }
9594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9595 }
9596 }
9597 else
9598 {
9599 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
9600 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9601 if (idxRegBase == UINT8_MAX)
9602 {
9603 /* lea ret, [(index64 << cShiftIndex) + disp32] */
9604 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
9605 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9606 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9607 | (f64Bit ? X86_OP_REX_W : 0);
9608 pbCodeBuf[off++] = 0x8d;
9609 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
9610 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
9611 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9612 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9613 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9614 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9615 }
9616 else
9617 {
9618 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
9619 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
9620 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
9621 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
9622 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
9623 | (f64Bit ? X86_OP_REX_W : 0);
9624 pbCodeBuf[off++] = 0x8d;
9625 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
9626 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
9627 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
9628 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
9629 if (bMod != X86_MOD_MEM0)
9630 {
9631 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
9632 if (bMod == X86_MOD_MEM4)
9633 {
9634 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
9635 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
9636 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
9637 }
9638 }
9639 }
9640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9641 }
9642
9643 if (!bFinalAdj)
9644 { /* likely */ }
9645 else
9646 {
9647 Assert(f64Bit);
9648 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
9649 }
9650
9651#elif defined(RT_ARCH_ARM64)
9652 if (i64EffAddr == 0)
9653 {
9654 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9655 if (idxRegIndex == UINT8_MAX)
9656 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
9657 else if (idxRegBase != UINT8_MAX)
9658 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
9659 f64Bit, false /*fSetFlags*/, cShiftIndex);
9660 else
9661 {
9662 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
9663 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
9664 }
9665 }
9666 else
9667 {
9668 if (f64Bit)
9669 { /* likely */ }
9670 else
9671 i64EffAddr = (int32_t)i64EffAddr;
9672
9673 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
9674 {
9675 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9676 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
9677 }
9678 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
9679 {
9680 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9681 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
9682 }
9683 else
9684 {
9685 if (f64Bit)
9686 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
9687 else
9688 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
9689 if (idxRegBase != UINT8_MAX)
9690 {
9691 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9692 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
9693 }
9694 }
9695 if (idxRegIndex != UINT8_MAX)
9696 {
9697 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9698 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
9699 f64Bit, false /*fSetFlags*/, cShiftIndex);
9700 }
9701 }
9702
9703#else
9704# error "port me"
9705#endif
9706
9707 if (idxRegIndex != UINT8_MAX)
9708 iemNativeRegFreeTmp(pReNative, idxRegIndex);
9709 if (idxRegBase != UINT8_MAX)
9710 iemNativeRegFreeTmp(pReNative, idxRegBase);
9711 iemNativeVarRegisterRelease(pReNative, idxVarRet);
9712 return off;
9713}
9714
9715
9716
9717
9718/*********************************************************************************************************************************
9719* Memory fetches and stores common *
9720*********************************************************************************************************************************/
9721
9722typedef enum IEMNATIVEMITMEMOP
9723{
9724 kIemNativeEmitMemOp_Store = 0,
9725 kIemNativeEmitMemOp_Fetch,
9726 kIemNativeEmitMemOp_Fetch_Zx_U16,
9727 kIemNativeEmitMemOp_Fetch_Zx_U32,
9728 kIemNativeEmitMemOp_Fetch_Zx_U64,
9729 kIemNativeEmitMemOp_Fetch_Sx_U16,
9730 kIemNativeEmitMemOp_Fetch_Sx_U32,
9731 kIemNativeEmitMemOp_Fetch_Sx_U64
9732} IEMNATIVEMITMEMOP;
9733
9734/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
9735 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
9736 * (with iSegReg = UINT8_MAX). */
9737DECL_INLINE_THROW(uint32_t)
9738iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
9739 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
9740 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
9741{
9742 /*
9743 * Assert sanity.
9744 */
9745 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
9746 Assert( enmOp != kIemNativeEmitMemOp_Store
9747 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
9748 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
9749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9750 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
9751 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
9752 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9753 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9754 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
9755 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9756#ifdef VBOX_STRICT
9757 if (iSegReg == UINT8_MAX)
9758 {
9759 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9760 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9761 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9762 switch (cbMem)
9763 {
9764 case 1:
9765 Assert( pfnFunction
9766 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
9767 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9768 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9769 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9770 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
9771 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
9772 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
9773 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
9774 : UINT64_C(0xc000b000a0009000) ));
9775 break;
9776 case 2:
9777 Assert( pfnFunction
9778 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
9779 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9780 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9781 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
9782 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
9783 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
9784 : UINT64_C(0xc000b000a0009000) ));
9785 break;
9786 case 4:
9787 Assert( pfnFunction
9788 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
9789 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
9790 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
9791 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
9792 : UINT64_C(0xc000b000a0009000) ));
9793 break;
9794 case 8:
9795 Assert( pfnFunction
9796 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
9797 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
9798 : UINT64_C(0xc000b000a0009000) ));
9799 break;
9800 }
9801 }
9802 else
9803 {
9804 Assert(iSegReg < 6);
9805 switch (cbMem)
9806 {
9807 case 1:
9808 Assert( pfnFunction
9809 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
9810 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
9811 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9812 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9813 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
9814 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
9815 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
9816 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
9817 : UINT64_C(0xc000b000a0009000) ));
9818 break;
9819 case 2:
9820 Assert( pfnFunction
9821 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
9822 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
9823 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
9824 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
9825 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
9826 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
9827 : UINT64_C(0xc000b000a0009000) ));
9828 break;
9829 case 4:
9830 Assert( pfnFunction
9831 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
9832 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
9833 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
9834 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
9835 : UINT64_C(0xc000b000a0009000) ));
9836 break;
9837 case 8:
9838 Assert( pfnFunction
9839 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
9840 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
9841 : UINT64_C(0xc000b000a0009000) ));
9842 break;
9843 }
9844 }
9845#endif
9846
9847#ifdef VBOX_STRICT
9848 /*
9849 * Check that the fExec flags we've got make sense.
9850 */
9851 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9852#endif
9853
9854 /*
9855 * To keep things simple we have to commit any pending writes first as we
9856 * may end up making calls.
9857 */
9858 /** @todo we could postpone this till we make the call and reload the
9859 * registers after returning from the call. Not sure if that's sensible or
9860 * not, though. */
9861 off = iemNativeRegFlushPendingWrites(pReNative, off);
9862
9863 /*
9864 * Move/spill/flush stuff out of call-volatile registers.
9865 * This is the easy way out. We could contain this to the tlb-miss branch
9866 * by saving and restoring active stuff here.
9867 */
9868 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9869 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9870
9871 /*
9872 * Define labels and allocate the result register (trying for the return
9873 * register if we can).
9874 */
9875 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9876 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
9877 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9878 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
9879 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9880 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
9881 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
9882
9883 /*
9884 * First we try to go via the TLB.
9885 */
9886//pReNative->pInstrBuf[off++] = 0xcc;
9887 /** @todo later. */
9888 RT_NOREF(fAlignMask, cbMem);
9889
9890 /*
9891 * Call helper to do the fetching.
9892 * We flush all guest register shadow copies here.
9893 */
9894 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
9895
9896#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9897 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9898#else
9899 RT_NOREF(idxInstr);
9900#endif
9901
9902 uint8_t idxRegArgValue;
9903 if (iSegReg == UINT8_MAX)
9904 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
9905 else
9906 {
9907 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
9908 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9909 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
9910
9911 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
9912 }
9913
9914 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
9915 if (enmOp == kIemNativeEmitMemOp_Store)
9916 {
9917 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
9918 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
9919 else
9920 {
9921 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
9922 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
9923 {
9924 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9925 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
9926 }
9927 else
9928 {
9929 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
9930 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9931 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
9932 }
9933 }
9934 }
9935
9936 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
9937 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
9938 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
9939 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
9940 else
9941 {
9942 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
9943 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
9944 {
9945 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9946 if (!offDisp)
9947 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
9948 else
9949 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
9950 }
9951 else
9952 {
9953 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
9954 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9955 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
9956 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
9957 if (offDisp)
9958 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
9959 }
9960 }
9961
9962 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9964
9965 /* Done setting up parameters, make the call. */
9966 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9967
9968 /*
9969 * Put the result in the right register if this is a fetch.
9970 */
9971 if (enmOp != kIemNativeEmitMemOp_Store)
9972 {
9973 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
9974 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
9975 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
9976 iemNativeVarRegisterRelease(pReNative, idxVarValue);
9977 }
9978
9979 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9980
9981 return off;
9982}
9983
9984
9985
9986/*********************************************************************************************************************************
9987* Memory fetches (IEM_MEM_FETCH_XXX). *
9988*********************************************************************************************************************************/
9989
9990/* 8-bit segmented: */
9991#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
9992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
9993 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
9994 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
9995
9996#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
9997 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
9998 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
9999 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10000
10001#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10003 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10004 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10005
10006#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10007 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10008 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10009 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
10010
10011#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10012 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10013 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10014 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10015
10016#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10017 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10018 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10019 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10020
10021#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10022 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10023 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10024 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10025
10026/* 16-bit segmented: */
10027#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
10028 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10029 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10030 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10031
10032#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10033 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
10034 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10035 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10036
10037#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10038 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10039 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10040 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10041
10042#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10043 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10044 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10045 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
10046
10047#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10048 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10049 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10050 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10051
10052#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10053 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10054 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10055 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10056
10057
10058/* 32-bit segmented: */
10059#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
10060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10061 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10062 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10063
10064#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
10065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
10066 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10067 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10068
10069#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10071 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10072 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
10073
10074#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10076 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10077 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10078
10079
10080/* 64-bit segmented: */
10081#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
10082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
10083 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10084 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
10085
10086
10087
10088/* 8-bit flat: */
10089#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
10090 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
10091 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
10092 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10093
10094#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
10095 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10096 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
10097 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10098
10099#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
10100 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10101 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10102 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10103
10104#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
10105 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10106 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10107 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
10108
10109#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
10110 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10111 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
10112 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
10113
10114#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
10115 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10116 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10117 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
10118
10119#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
10120 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10121 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10122 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
10123
10124
10125/* 16-bit flat: */
10126#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
10127 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10128 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10129 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10130
10131#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
10132 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
10133 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
10134 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
10135
10136#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
10137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10138 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
10139 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10140
10141#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
10142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10143 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10144 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
10145
10146#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
10147 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10148 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
10149 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
10150
10151#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
10152 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10153 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10154 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
10155
10156/* 32-bit flat: */
10157#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
10158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10159 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10160 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10161
10162#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
10163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
10164 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
10165 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
10166
10167#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
10168 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10169 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
10170 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
10171
10172#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
10173 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10174 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
10175 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
10176
10177/* 64-bit flat: */
10178#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
10179 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
10180 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
10181 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
10182
10183
10184
10185/*********************************************************************************************************************************
10186* Memory stores (IEM_MEM_STORE_XXX). *
10187*********************************************************************************************************************************/
10188
10189#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
10190 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
10191 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10192 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10193
10194#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
10195 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
10196 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10197 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10198
10199#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
10200 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
10201 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10202 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10203
10204#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
10205 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
10206 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10207 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10208
10209
10210#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
10211 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
10212 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
10213 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10214
10215#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
10216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
10217 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
10218 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10219
10220#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
10221 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
10222 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
10223 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10224
10225#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
10226 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
10227 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
10228 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10229
10230
10231#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
10232 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10233 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
10234
10235#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
10236 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10237 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
10238
10239#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
10240 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10241 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
10242
10243#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
10244 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10245 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
10246
10247
10248#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
10249 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10250 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
10251
10252#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
10253 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10254 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
10255
10256#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
10257 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10258 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
10259
10260#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
10261 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10262 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
10263
10264/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
10265 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
10266DECL_INLINE_THROW(uint32_t)
10267iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
10268 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
10269{
10270 /*
10271 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
10272 * to do the grunt work.
10273 */
10274 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
10275 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
10276 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
10277 pfnFunction, idxInstr);
10278 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
10279 return off;
10280}
10281
10282
10283
10284/*********************************************************************************************************************************
10285* Stack Accesses. *
10286*********************************************************************************************************************************/
10287/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
10288#define IEM_MC_PUSH_U16(a_u16Value) \
10289 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10290 (uintptr_t)iemNativeHlpStackPushU16, pCallEntry->idxInstr)
10291#define IEM_MC_PUSH_U32(a_u32Value) \
10292 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10293 (uintptr_t)iemNativeHlpStackPushU32, pCallEntry->idxInstr)
10294#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
10295 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
10296 (uintptr_t)iemNativeHlpStackPushU32SReg, pCallEntry->idxInstr)
10297#define IEM_MC_PUSH_U64(a_u64Value) \
10298 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10299 (uintptr_t)iemNativeHlpStackPushU64, pCallEntry->idxInstr)
10300
10301#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
10302 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10303 (uintptr_t)iemNativeHlpStackFlat32PushU16, pCallEntry->idxInstr)
10304#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
10305 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10306 (uintptr_t)iemNativeHlpStackFlat32PushU32, pCallEntry->idxInstr)
10307#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
10308 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
10309 (uintptr_t)iemNativeHlpStackFlat32PushU32SReg, pCallEntry->idxInstr)
10310
10311#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
10312 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10313 (uintptr_t)iemNativeHlpStackFlat64PushU16, pCallEntry->idxInstr)
10314#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
10315 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10316 (uintptr_t)iemNativeHlpStackFlat64PushU64, pCallEntry->idxInstr)
10317
10318/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
10319DECL_INLINE_THROW(uint32_t)
10320iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
10321 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10322{
10323 /*
10324 * Assert sanity.
10325 */
10326 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10327#ifdef VBOX_STRICT
10328 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10329 {
10330 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10331 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10332 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10333 Assert( pfnFunction
10334 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU16
10335 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32
10336 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32SReg
10337 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
10338 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU64
10339 : UINT64_C(0xc000b000a0009000) ));
10340 }
10341 else
10342 Assert( pfnFunction
10343 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU16
10344 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU32
10345 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackPushU32SReg
10346 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU64
10347 : UINT64_C(0xc000b000a0009000) ));
10348#endif
10349
10350#ifdef VBOX_STRICT
10351 /*
10352 * Check that the fExec flags we've got make sense.
10353 */
10354 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10355#endif
10356
10357 /*
10358 * To keep things simple we have to commit any pending writes first as we
10359 * may end up making calls.
10360 */
10361 /** @todo we could postpone this till we make the call and reload the
10362 * registers after returning from the call. Not sure if that's sensible or
10363 * not, though. */
10364 off = iemNativeRegFlushPendingWrites(pReNative, off);
10365
10366 /*
10367 * Move/spill/flush stuff out of call-volatile registers, keeping whatever
10368 * idxVarValue might be occupying.
10369 *
10370 * This is the easy way out. We could contain this to the tlb-miss branch
10371 * by saving and restoring active stuff here.
10372 */
10373 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10374 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarValue));
10375
10376 /* For now, flush any shadow copy of the xSP register. */
10377 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10378
10379 /*
10380 * Define labels and allocate the result register (trying for the return
10381 * register if we can).
10382 */
10383 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10384 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10385 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10386
10387 /*
10388 * First we try to go via the TLB.
10389 */
10390//pReNative->pInstrBuf[off++] = 0xcc;
10391 /** @todo later. */
10392 RT_NOREF(cBitsVarAndFlat);
10393
10394 /*
10395 * Call helper to do the popping.
10396 */
10397 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10398
10399#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10400 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10401#else
10402 RT_NOREF(idxInstr);
10403#endif
10404
10405 /* IEMNATIVE_CALL_ARG1_GREG = idxVarValue (first) */
10406 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarValue,
10407 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
10408
10409 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10410 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10411
10412 /* Done setting up parameters, make the call. */
10413 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10414
10415 /* The value variable is implictly flushed. */
10416 iemNativeVarFreeLocal(pReNative, idxVarValue);
10417
10418 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10419
10420 return off;
10421}
10422
10423
10424
10425/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
10426#define IEM_MC_POP_GREG_U16(a_iGReg) \
10427 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
10428 (uintptr_t)iemNativeHlpStackPopGRegU16, pCallEntry->idxInstr)
10429#define IEM_MC_POP_GREG_U32(a_iGReg) \
10430 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
10431 (uintptr_t)iemNativeHlpStackPopGRegU32, pCallEntry->idxInstr)
10432#define IEM_MC_POP_GREG_U64(a_iGReg) \
10433 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
10434 (uintptr_t)iemNativeHlpStackPopGRegU64, pCallEntry->idxInstr)
10435
10436#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
10437 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
10438 (uintptr_t)iemNativeHlpStackFlat32PopGRegU16, pCallEntry->idxInstr)
10439#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
10440 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
10441 (uintptr_t)iemNativeHlpStackFlat32PopGRegU32, pCallEntry->idxInstr)
10442
10443#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
10444 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
10445 (uintptr_t)iemNativeHlpStackFlat64PopGRegU16, pCallEntry->idxInstr)
10446#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
10447 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
10448 (uintptr_t)iemNativeHlpStackFlat64PopGRegU64, pCallEntry->idxInstr)
10449
10450/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
10451DECL_INLINE_THROW(uint32_t)
10452iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
10453 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
10454{
10455 /*
10456 * Assert sanity.
10457 */
10458 Assert(idxGReg < 16);
10459#ifdef VBOX_STRICT
10460 if (RT_BYTE2(cBitsVarAndFlat) != 0)
10461 {
10462 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10463 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10464 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10465 Assert( pfnFunction
10466 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU16
10467 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU32
10468 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU16
10469 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU64
10470 : UINT64_C(0xc000b000a0009000) ));
10471 }
10472 else
10473 Assert( pfnFunction
10474 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU16
10475 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU32
10476 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU64
10477 : UINT64_C(0xc000b000a0009000) ));
10478#endif
10479
10480#ifdef VBOX_STRICT
10481 /*
10482 * Check that the fExec flags we've got make sense.
10483 */
10484 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10485#endif
10486
10487 /*
10488 * To keep things simple we have to commit any pending writes first as we
10489 * may end up making calls.
10490 */
10491 /** @todo we could postpone this till we make the call and reload the
10492 * registers after returning from the call. Not sure if that's sensible or
10493 * not, though. */
10494 off = iemNativeRegFlushPendingWrites(pReNative, off);
10495
10496 /*
10497 * Move/spill/flush stuff out of call-volatile registers.
10498 * This is the easy way out. We could contain this to the tlb-miss branch
10499 * by saving and restoring active stuff here.
10500 */
10501 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10502 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10503
10504 /* For now, flush the any shadow copy of the guest register that is about
10505 to be popped and the xSP register. */
10506 iemNativeRegFlushGuestShadows(pReNative,
10507 RT_BIT_64(IEMNATIVEGSTREG_GPR(idxGReg)) | RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
10508
10509 /*
10510 * Define labels and allocate the result register (trying for the return
10511 * register if we can).
10512 */
10513 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10514 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10515 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10516
10517 /*
10518 * First we try to go via the TLB.
10519 */
10520//pReNative->pInstrBuf[off++] = 0xcc;
10521 /** @todo later. */
10522 RT_NOREF(cBitsVarAndFlat);
10523
10524 /*
10525 * Call helper to do the popping.
10526 */
10527 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10528
10529#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10530 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10531#else
10532 RT_NOREF(idxInstr);
10533#endif
10534
10535 /* IEMNATIVE_CALL_ARG1_GREG = iGReg */
10536 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxGReg);
10537
10538 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10539 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10540
10541 /* Done setting up parameters, make the call. */
10542 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10543
10544 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10545
10546 return off;
10547}
10548
10549
10550
10551/*********************************************************************************************************************************
10552* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
10553*********************************************************************************************************************************/
10554
10555#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10556 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10557 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10558 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
10559
10560#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10561 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10562 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10563 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
10564
10565#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10566 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
10567 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
10568 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
10569
10570
10571#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10572 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10573 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10574 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
10575
10576#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10577 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10578 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10579 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
10580
10581#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10582 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
10583 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10584 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
10585
10586#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10587 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
10588 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10589 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
10590
10591
10592#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10593 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10594 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10595 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
10596
10597#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10598 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10599 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10600 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
10601
10602#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10603 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
10604 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10605 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
10606
10607#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10608 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
10609 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10610 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
10611
10612
10613#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10614 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10615 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10616 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
10617
10618#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10619 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10620 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10621 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
10622
10623#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10624 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
10625 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10626 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
10627
10628#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10629 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
10630 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10631 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
10632
10633
10634#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10635 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
10636 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10637 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
10638
10639#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10640 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
10641 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
10642 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
10643
10644
10645#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10646 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10647 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10648 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
10649
10650#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10651 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10652 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10653 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
10654
10655#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
10656 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
10657 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10658 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
10659
10660
10661
10662#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10663 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10664 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10665 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
10666
10667#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10668 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10669 IEM_ACCESS_TYPE_WRITE, 0 /*fAlignMask*/, \
10670 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
10671
10672#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
10673 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
10674 IEM_ACCESS_TYPE_READ, 0 /*fAlignMask*/, \
10675 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
10676
10677
10678#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10679 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10680 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10681 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
10682
10683#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10684 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10685 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10686 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
10687
10688#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
10689 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
10690 IEM_ACCESS_TYPE_READ, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10691 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
10692
10693#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
10694 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
10695 IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
10696 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
10697
10698
10699#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10700 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10701 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10702 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
10703
10704#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10705 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10706 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10707 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
10708
10709#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
10710 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
10711 IEM_ACCESS_TYPE_READ, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10712 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
10713
10714#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
10715 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
10716 IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
10717 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
10718
10719
10720#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10721 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10722 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10723 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
10724
10725#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10726 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10727 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10728 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
10729
10730#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
10731 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
10732 IEM_ACCESS_TYPE_READ, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10733 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
10734
10735#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
10736 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
10737 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10738 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
10739
10740
10741#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
10742 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
10743 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
10744 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
10745
10746#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
10747 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
10748 IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
10749 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
10750
10751
10752#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10753 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10754 IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10755 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
10756
10757#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10758 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10759 IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10760 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
10761
10762#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
10763 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
10764 IEM_ACCESS_TYPE_READ, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
10765 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
10766
10767
10768DECL_INLINE_THROW(uint32_t)
10769iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
10770 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
10771 uintptr_t pfnFunction, uint8_t idxInstr)
10772{
10773 /*
10774 * Assert sanity.
10775 */
10776 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
10777 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
10778 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
10779 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10780
10781 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
10782 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
10783 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
10784 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10785
10786 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10787 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10788 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10789 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10790
10791 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10792
10793 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10794
10795#ifdef VBOX_STRICT
10796# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
10797 ( ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
10798 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
10799 : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
10800 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
10801
10802 if (iSegReg == UINT8_MAX)
10803 {
10804 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10805 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10806 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10807 switch (cbMem)
10808 {
10809 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
10810 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
10811 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
10812 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
10813 case 10:
10814 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
10815 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
10816 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
10817 break;
10818 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
10819# if 0
10820 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
10821 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
10822# endif
10823 default: AssertFailed(); break;
10824 }
10825 }
10826 else
10827 {
10828 Assert(iSegReg < 6);
10829 switch (cbMem)
10830 {
10831 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
10832 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
10833 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
10834 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
10835 case 10:
10836 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
10837 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
10838 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
10839 break;
10840 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
10841# if 0
10842 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
10843 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
10844# endif
10845 default: AssertFailed(); break;
10846 }
10847 }
10848# undef IEM_MAP_HLP_FN
10849#endif
10850
10851#ifdef VBOX_STRICT
10852 /*
10853 * Check that the fExec flags we've got make sense.
10854 */
10855 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
10856#endif
10857
10858 /*
10859 * To keep things simple we have to commit any pending writes first as we
10860 * may end up making calls.
10861 */
10862 /** @todo we could postpone this till we make the call and reload the
10863 * registers after returning from the call. Not sure if that's sensible or
10864 * not, though. */
10865 off = iemNativeRegFlushPendingWrites(pReNative, off);
10866
10867 /*
10868 * Move/spill/flush stuff out of call-volatile registers.
10869 * This is the easy way out. We could contain this to the tlb-miss branch
10870 * by saving and restoring active stuff here.
10871 */
10872 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
10873 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
10874
10875 /*
10876 * Define labels and allocate the result register (trying for the return
10877 * register if we can - which we of course can, given the above call).
10878 */
10879 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
10880 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
10881 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
10882 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
10883 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
10884 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
10885
10886 /*
10887 * First we try to go via the TLB.
10888 */
10889//pReNative->pInstrBuf[off++] = 0xcc;
10890 /** @todo later. */
10891 RT_NOREF(fAccess, fAlignMask, cbMem);
10892
10893 /*
10894 * Call helper to do the fetching.
10895 * We flush all guest register shadow copies here.
10896 */
10897 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
10898
10899#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10900 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10901#else
10902 RT_NOREF(idxInstr);
10903#endif
10904
10905 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
10906 if (iSegReg != UINT8_MAX)
10907 {
10908 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
10909 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
10910 }
10911
10912 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem */
10913 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem);
10914
10915 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo */
10916 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
10917 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, true /*fFlushShadows*/);
10918
10919 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
10920 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10921
10922 /* Done setting up parameters, make the call. */
10923 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
10924
10925 /*
10926 * Put the result in the right register .
10927 */
10928 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
10929 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
10930 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
10931 iemNativeVarRegisterRelease(pReNative, idxVarMem);
10932
10933 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
10934
10935 return off;
10936}
10937
10938
10939#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
10940 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
10941 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
10942
10943#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
10944 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
10945 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
10946
10947#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
10948 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
10949 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
10950
10951DECL_INLINE_THROW(uint32_t)
10952iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
10953 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
10954{
10955 /*
10956 * Assert sanity.
10957 */
10958 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
10959 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
10960 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
10961 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
10962#ifdef VBOX_STRICT
10963 switch (fAccess & IEM_ACCESS_TYPE_MASK)
10964 {
10965 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
10966 case IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
10967 case IEM_ACCESS_TYPE_READ: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
10968 default: AssertFailed();
10969 }
10970#else
10971 RT_NOREF(fAccess);
10972#endif
10973
10974 /*
10975 * To keep things simple we have to commit any pending writes first as we
10976 * may end up making calls (there shouldn't be any at this point, so this
10977 * is just for consistency).
10978 */
10979 /** @todo we could postpone this till we make the call and reload the
10980 * registers after returning from the call. Not sure if that's sensible or
10981 * not, though. */
10982 off = iemNativeRegFlushPendingWrites(pReNative, off);
10983
10984 /*
10985 * Move/spill/flush stuff out of call-volatile registers.
10986 *
10987 * We exclude any register holding the bUnmapInfo variable, as we'll be
10988 * checking it after returning from the call and will free it afterwards.
10989 */
10990 /** @todo save+restore active registers and maybe guest shadows in miss
10991 * scenario. */
10992 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
10993
10994 /*
10995 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
10996 * to call the unmap helper function.
10997 *
10998 * The likelyhood of it being zero is higher than for the TLB hit when doing
10999 * the mapping, as a TLB miss for an well aligned and unproblematic memory
11000 * access should also end up with a mapping that won't need special unmapping.
11001 */
11002 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
11003 * should speed up things for the pure interpreter as well when TLBs
11004 * are enabled. */
11005#ifdef RT_ARCH_AMD64
11006 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
11007 {
11008 /* test byte [rbp - xxx], 0ffh */
11009 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11010 pbCodeBuf[off++] = 0xf6;
11011 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
11012 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11013 pbCodeBuf[off++] = 0xff;
11014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11015 }
11016 else
11017#endif
11018 {
11019 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
11020 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
11021 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
11022 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
11023 }
11024 uint32_t const offJmpFixup = off;
11025 off = iemNativeEmitJzToFixed(pReNative, off, 0);
11026
11027 /*
11028 * Call the unmap helper function.
11029 */
11030#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
11031 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11032#else
11033 RT_NOREF(idxInstr);
11034#endif
11035
11036 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
11037 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
11038 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
11039
11040 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11041 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11042
11043 /* Done setting up parameters, make the call. */
11044 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11045
11046 /* The bUnmapInfo variable is implictly free by these MCs. */
11047 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
11048
11049 /*
11050 * Done, just fixup the jump for the non-call case.
11051 */
11052 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
11053
11054 return off;
11055}
11056
11057
11058
11059/*********************************************************************************************************************************
11060* State and Exceptions *
11061*********************************************************************************************************************************/
11062
11063#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11064#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11065
11066#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11067#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11068#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11069
11070#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11071#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
11072#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
11073
11074
11075DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
11076{
11077 /** @todo this needs a lot more work later. */
11078 RT_NOREF(pReNative, fForChange);
11079 return off;
11080}
11081
11082
11083/*********************************************************************************************************************************
11084* The native code generator functions for each MC block. *
11085*********************************************************************************************************************************/
11086
11087
11088/*
11089 * Include g_apfnIemNativeRecompileFunctions and associated functions.
11090 *
11091 * This should probably live in it's own file later, but lets see what the
11092 * compile times turn out to be first.
11093 */
11094#include "IEMNativeFunctions.cpp.h"
11095
11096
11097
11098/*********************************************************************************************************************************
11099* Recompiler Core. *
11100*********************************************************************************************************************************/
11101
11102
11103/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
11104static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
11105{
11106 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
11107 pDis->cbCachedInstr += cbMaxRead;
11108 RT_NOREF(cbMinRead);
11109 return VERR_NO_DATA;
11110}
11111
11112
11113/**
11114 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
11115 * @returns pszBuf.
11116 * @param fFlags The flags.
11117 * @param pszBuf The output buffer.
11118 * @param cbBuf The output buffer size. At least 32 bytes.
11119 */
11120DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
11121{
11122 Assert(cbBuf >= 32);
11123 static RTSTRTUPLE const s_aModes[] =
11124 {
11125 /* [00] = */ { RT_STR_TUPLE("16BIT") },
11126 /* [01] = */ { RT_STR_TUPLE("32BIT") },
11127 /* [02] = */ { RT_STR_TUPLE("!2!") },
11128 /* [03] = */ { RT_STR_TUPLE("!3!") },
11129 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
11130 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
11131 /* [06] = */ { RT_STR_TUPLE("!6!") },
11132 /* [07] = */ { RT_STR_TUPLE("!7!") },
11133 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
11134 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
11135 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
11136 /* [0b] = */ { RT_STR_TUPLE("!b!") },
11137 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
11138 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
11139 /* [0e] = */ { RT_STR_TUPLE("!e!") },
11140 /* [0f] = */ { RT_STR_TUPLE("!f!") },
11141 /* [10] = */ { RT_STR_TUPLE("!10!") },
11142 /* [11] = */ { RT_STR_TUPLE("!11!") },
11143 /* [12] = */ { RT_STR_TUPLE("!12!") },
11144 /* [13] = */ { RT_STR_TUPLE("!13!") },
11145 /* [14] = */ { RT_STR_TUPLE("!14!") },
11146 /* [15] = */ { RT_STR_TUPLE("!15!") },
11147 /* [16] = */ { RT_STR_TUPLE("!16!") },
11148 /* [17] = */ { RT_STR_TUPLE("!17!") },
11149 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
11150 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
11151 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
11152 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
11153 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
11154 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
11155 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
11156 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
11157 };
11158 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
11159 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
11160 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
11161
11162 pszBuf[off++] = ' ';
11163 pszBuf[off++] = 'C';
11164 pszBuf[off++] = 'P';
11165 pszBuf[off++] = 'L';
11166 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
11167 Assert(off < 32);
11168
11169 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
11170
11171 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
11172 {
11173 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
11174 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
11175 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
11176 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
11177 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
11178 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
11179 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
11180 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
11181 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
11182 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
11183 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
11184 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
11185 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
11186 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
11187 };
11188 if (fFlags)
11189 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
11190 if (s_aFlags[i].fFlag & fFlags)
11191 {
11192 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
11193 pszBuf[off++] = ' ';
11194 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
11195 off += s_aFlags[i].cchName;
11196 fFlags &= ~s_aFlags[i].fFlag;
11197 if (!fFlags)
11198 break;
11199 }
11200 pszBuf[off] = '\0';
11201
11202 return pszBuf;
11203}
11204
11205
11206DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
11207{
11208 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
11209#if defined(RT_ARCH_AMD64)
11210 static const char * const a_apszMarkers[] =
11211 {
11212 "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes", "PcAfterBranch",
11213 };
11214#endif
11215
11216 char szDisBuf[512];
11217 DISSTATE Dis;
11218 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
11219 uint32_t const cNative = pTb->Native.cInstructions;
11220 uint32_t offNative = 0;
11221#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11222 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
11223#endif
11224 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
11225 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
11226 : DISCPUMODE_64BIT;
11227#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11228 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
11229#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11230 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
11231#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
11232# error "Port me"
11233#else
11234 csh hDisasm = ~(size_t)0;
11235# if defined(RT_ARCH_AMD64)
11236 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
11237# elif defined(RT_ARCH_ARM64)
11238 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
11239# else
11240# error "Port me"
11241# endif
11242 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
11243#endif
11244
11245 /*
11246 * Print TB info.
11247 */
11248 pHlp->pfnPrintf(pHlp,
11249 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
11250 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
11251 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
11252 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
11253#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11254 if (pDbgInfo && pDbgInfo->cEntries > 1)
11255 {
11256 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
11257
11258 /*
11259 * This disassembly is driven by the debug info which follows the native
11260 * code and indicates when it starts with the next guest instructions,
11261 * where labels are and such things.
11262 */
11263 uint32_t idxThreadedCall = 0;
11264 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
11265 uint8_t idxRange = UINT8_MAX;
11266 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
11267 uint32_t offRange = 0;
11268 uint32_t offOpcodes = 0;
11269 uint32_t const cbOpcodes = pTb->cbOpcodes;
11270 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
11271 uint32_t const cDbgEntries = pDbgInfo->cEntries;
11272 uint32_t iDbgEntry = 1;
11273 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
11274
11275 while (offNative < cNative)
11276 {
11277 /* If we're at or have passed the point where the next chunk of debug
11278 info starts, process it. */
11279 if (offDbgNativeNext <= offNative)
11280 {
11281 offDbgNativeNext = UINT32_MAX;
11282 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
11283 {
11284 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
11285 {
11286 case kIemTbDbgEntryType_GuestInstruction:
11287 {
11288 /* Did the exec flag change? */
11289 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
11290 {
11291 pHlp->pfnPrintf(pHlp,
11292 " fExec change %#08x -> %#08x %s\n",
11293 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
11294 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
11295 szDisBuf, sizeof(szDisBuf)));
11296 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
11297 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
11298 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
11299 : DISCPUMODE_64BIT;
11300 }
11301
11302 /* New opcode range? We need to fend up a spurious debug info entry here for cases
11303 where the compilation was aborted before the opcode was recorded and the actual
11304 instruction was translated to a threaded call. This may happen when we run out
11305 of ranges, or when some complicated interrupts/FFs are found to be pending or
11306 similar. So, we just deal with it here rather than in the compiler code as it
11307 is a lot simpler to do here. */
11308 if ( idxRange == UINT8_MAX
11309 || idxRange >= cRanges
11310 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
11311 {
11312 idxRange += 1;
11313 if (idxRange < cRanges)
11314 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
11315 else
11316 continue;
11317 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
11318 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
11319 + (pTb->aRanges[idxRange].idxPhysPage == 0
11320 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
11321 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
11322 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
11323 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
11324 pTb->aRanges[idxRange].idxPhysPage);
11325 GCPhysPc += offRange;
11326 }
11327
11328 /* Disassemble the instruction. */
11329 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
11330 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offRange, 15);
11331 uint32_t cbInstr = 1;
11332 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
11333 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
11334 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
11335 if (RT_SUCCESS(rc))
11336 {
11337 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11338 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11339 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11340 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11341
11342 static unsigned const s_offMarker = 55;
11343 static char const s_szMarker[] = " ; <--- guest";
11344 if (cch < s_offMarker)
11345 {
11346 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
11347 cch = s_offMarker;
11348 }
11349 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
11350 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
11351
11352 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
11353 }
11354 else
11355 {
11356 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
11357 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
11358 cbInstr = 1;
11359 }
11360 GCPhysPc += cbInstr;
11361 offOpcodes += cbInstr;
11362 offRange += cbInstr;
11363 continue;
11364 }
11365
11366 case kIemTbDbgEntryType_ThreadedCall:
11367 pHlp->pfnPrintf(pHlp,
11368 " Call #%u to %s (%u args) - %s\n",
11369 idxThreadedCall,
11370 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
11371 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
11372 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
11373 idxThreadedCall++;
11374 continue;
11375
11376 case kIemTbDbgEntryType_GuestRegShadowing:
11377 {
11378 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
11379 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
11380 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
11381 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
11382 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
11383 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
11384 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
11385 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
11386 else
11387 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
11388 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
11389 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
11390 continue;
11391 }
11392
11393 case kIemTbDbgEntryType_Label:
11394 {
11395 const char *pszName = "what_the_fudge";
11396 const char *pszComment = "";
11397 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
11398 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
11399 {
11400 case kIemNativeLabelType_Return:
11401 pszName = "Return";
11402 break;
11403 case kIemNativeLabelType_ReturnBreak:
11404 pszName = "ReturnBreak";
11405 break;
11406 case kIemNativeLabelType_ReturnWithFlags:
11407 pszName = "ReturnWithFlags";
11408 break;
11409 case kIemNativeLabelType_NonZeroRetOrPassUp:
11410 pszName = "NonZeroRetOrPassUp";
11411 break;
11412 case kIemNativeLabelType_RaiseGp0:
11413 pszName = "RaiseGp0";
11414 break;
11415 case kIemNativeLabelType_ObsoleteTb:
11416 pszName = "ObsoleteTb";
11417 break;
11418 case kIemNativeLabelType_NeedCsLimChecking:
11419 pszName = "NeedCsLimChecking";
11420 break;
11421 case kIemNativeLabelType_CheckBranchMiss:
11422 pszName = "CheckBranchMiss";
11423 break;
11424 case kIemNativeLabelType_If:
11425 pszName = "If";
11426 fNumbered = true;
11427 break;
11428 case kIemNativeLabelType_Else:
11429 pszName = "Else";
11430 fNumbered = true;
11431 pszComment = " ; regs state restored pre-if-block";
11432 break;
11433 case kIemNativeLabelType_Endif:
11434 pszName = "Endif";
11435 fNumbered = true;
11436 break;
11437 case kIemNativeLabelType_CheckIrq:
11438 pszName = "CheckIrq_CheckVM";
11439 fNumbered = true;
11440 break;
11441 case kIemNativeLabelType_TlbMiss:
11442 pszName = "TlbMiss";
11443 fNumbered = true;
11444 break;
11445 case kIemNativeLabelType_TlbDone:
11446 pszName = "TlbDone";
11447 fNumbered = true;
11448 break;
11449 case kIemNativeLabelType_Invalid:
11450 case kIemNativeLabelType_End:
11451 break;
11452 }
11453 if (fNumbered)
11454 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
11455 else
11456 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
11457 continue;
11458 }
11459
11460 case kIemTbDbgEntryType_NativeOffset:
11461 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
11462 Assert(offDbgNativeNext > offNative);
11463 break;
11464
11465 default:
11466 AssertFailed();
11467 }
11468 iDbgEntry++;
11469 break;
11470 }
11471 }
11472
11473 /*
11474 * Disassemble the next native instruction.
11475 */
11476 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
11477# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
11478 uint32_t cbInstr = sizeof(paNative[0]);
11479 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
11480 if (RT_SUCCESS(rc))
11481 {
11482# if defined(RT_ARCH_AMD64)
11483 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
11484 {
11485 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
11486 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
11487 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
11488 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
11489 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
11490 uInfo & 0x8000 ? "recompiled" : "todo");
11491 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
11492 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
11493 else
11494 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
11495 }
11496 else
11497# endif
11498 {
11499# ifdef RT_ARCH_AMD64
11500 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11501 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11502 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11503 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11504# elif defined(RT_ARCH_ARM64)
11505 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
11506 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11507 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11508# else
11509# error "Port me"
11510# endif
11511 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
11512 }
11513 }
11514 else
11515 {
11516# if defined(RT_ARCH_AMD64)
11517 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
11518 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
11519# elif defined(RT_ARCH_ARM64)
11520 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
11521# else
11522# error "Port me"
11523# endif
11524 cbInstr = sizeof(paNative[0]);
11525 }
11526 offNative += cbInstr / sizeof(paNative[0]);
11527
11528# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
11529 cs_insn *pInstr;
11530 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
11531 (uintptr_t)pNativeCur, 1, &pInstr);
11532 if (cInstrs > 0)
11533 {
11534 Assert(cInstrs == 1);
11535# if defined(RT_ARCH_AMD64)
11536 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
11537 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
11538# else
11539 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
11540 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
11541# endif
11542 offNative += pInstr->size / sizeof(*pNativeCur);
11543 cs_free(pInstr, cInstrs);
11544 }
11545 else
11546 {
11547# if defined(RT_ARCH_AMD64)
11548 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
11549 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
11550# else
11551 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
11552# endif
11553 offNative++;
11554 }
11555# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
11556 }
11557 }
11558 else
11559#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
11560 {
11561 /*
11562 * No debug info, just disassemble the x86 code and then the native code.
11563 *
11564 * First the guest code:
11565 */
11566 for (unsigned i = 0; i < pTb->cRanges; i++)
11567 {
11568 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
11569 + (pTb->aRanges[i].idxPhysPage == 0
11570 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
11571 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
11572 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
11573 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
11574 unsigned off = pTb->aRanges[i].offOpcodes;
11575 /** @todo this ain't working when crossing pages! */
11576 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
11577 while (off < cbOpcodes)
11578 {
11579 uint32_t cbInstr = 1;
11580 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
11581 &pTb->pabOpcodes[off], cbOpcodes - off,
11582 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
11583 if (RT_SUCCESS(rc))
11584 {
11585 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11586 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11587 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11588 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11589 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
11590 GCPhysPc += cbInstr;
11591 off += cbInstr;
11592 }
11593 else
11594 {
11595 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
11596 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
11597 break;
11598 }
11599 }
11600 }
11601
11602 /*
11603 * Then the native code:
11604 */
11605 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
11606 while (offNative < cNative)
11607 {
11608 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
11609# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
11610 uint32_t cbInstr = sizeof(paNative[0]);
11611 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
11612 if (RT_SUCCESS(rc))
11613 {
11614# if defined(RT_ARCH_AMD64)
11615 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
11616 {
11617 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
11618 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
11619 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
11620 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
11621 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
11622 uInfo & 0x8000 ? "recompiled" : "todo");
11623 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
11624 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
11625 else
11626 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
11627 }
11628 else
11629# endif
11630 {
11631# ifdef RT_ARCH_AMD64
11632 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
11633 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
11634 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11635 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11636# elif defined(RT_ARCH_ARM64)
11637 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
11638 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
11639 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
11640# else
11641# error "Port me"
11642# endif
11643 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
11644 }
11645 }
11646 else
11647 {
11648# if defined(RT_ARCH_AMD64)
11649 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
11650 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
11651# else
11652 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
11653# endif
11654 cbInstr = sizeof(paNative[0]);
11655 }
11656 offNative += cbInstr / sizeof(paNative[0]);
11657
11658# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
11659 cs_insn *pInstr;
11660 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
11661 (uintptr_t)pNativeCur, 1, &pInstr);
11662 if (cInstrs > 0)
11663 {
11664 Assert(cInstrs == 1);
11665# if defined(RT_ARCH_AMD64)
11666 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
11667 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
11668# else
11669 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
11670 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
11671# endif
11672 offNative += pInstr->size / sizeof(*pNativeCur);
11673 cs_free(pInstr, cInstrs);
11674 }
11675 else
11676 {
11677# if defined(RT_ARCH_AMD64)
11678 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
11679 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
11680# else
11681 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
11682# endif
11683 offNative++;
11684 }
11685# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
11686 }
11687 }
11688
11689#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
11690 /* Cleanup. */
11691 cs_close(&hDisasm);
11692#endif
11693}
11694
11695
11696/**
11697 * Recompiles the given threaded TB into a native one.
11698 *
11699 * In case of failure the translation block will be returned as-is.
11700 *
11701 * @returns pTb.
11702 * @param pVCpu The cross context virtual CPU structure of the calling
11703 * thread.
11704 * @param pTb The threaded translation to recompile to native.
11705 */
11706DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
11707{
11708 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
11709
11710 /*
11711 * The first time thru, we allocate the recompiler state, the other times
11712 * we just need to reset it before using it again.
11713 */
11714 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
11715 if (RT_LIKELY(pReNative))
11716 iemNativeReInit(pReNative, pTb);
11717 else
11718 {
11719 pReNative = iemNativeInit(pVCpu, pTb);
11720 AssertReturn(pReNative, pTb);
11721 }
11722
11723 /*
11724 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
11725 * for aborting if an error happens.
11726 */
11727 uint32_t cCallsLeft = pTb->Thrd.cCalls;
11728#ifdef LOG_ENABLED
11729 uint32_t const cCallsOrg = cCallsLeft;
11730#endif
11731 uint32_t off = 0;
11732 int rc = VINF_SUCCESS;
11733 IEMNATIVE_TRY_SETJMP(pReNative, rc)
11734 {
11735 /*
11736 * Emit prolog code (fixed).
11737 */
11738 off = iemNativeEmitProlog(pReNative, off);
11739
11740 /*
11741 * Convert the calls to native code.
11742 */
11743#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11744 int32_t iGstInstr = -1;
11745#endif
11746#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
11747 uint32_t cThreadedCalls = 0;
11748 uint32_t cRecompiledCalls = 0;
11749#endif
11750 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
11751 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
11752 while (cCallsLeft-- > 0)
11753 {
11754 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
11755
11756 /*
11757 * Debug info and assembly markup.
11758 */
11759 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
11760 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
11761#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11762 iemNativeDbgInfoAddNativeOffset(pReNative, off);
11763 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
11764 {
11765 if (iGstInstr < (int32_t)pTb->cInstructions)
11766 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
11767 else
11768 Assert(iGstInstr == pTb->cInstructions);
11769 iGstInstr = pCallEntry->idxInstr;
11770 }
11771 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
11772#endif
11773#if defined(VBOX_STRICT)
11774 off = iemNativeEmitMarker(pReNative, off,
11775 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
11776 pCallEntry->enmFunction));
11777#endif
11778#if defined(VBOX_STRICT)
11779 iemNativeRegAssertSanity(pReNative);
11780#endif
11781
11782 /*
11783 * Actual work.
11784 */
11785 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
11786 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
11787 if (pfnRecom) /** @todo stats on this. */
11788 {
11789 off = pfnRecom(pReNative, off, pCallEntry);
11790 STAM_REL_STATS({cRecompiledCalls++;});
11791 }
11792 else
11793 {
11794 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
11795 STAM_REL_STATS({cThreadedCalls++;});
11796 }
11797 Assert(off <= pReNative->cInstrBufAlloc);
11798 Assert(pReNative->cCondDepth == 0);
11799
11800 /*
11801 * Advance.
11802 */
11803 pCallEntry++;
11804 }
11805
11806 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
11807 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
11808 if (!cThreadedCalls)
11809 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
11810
11811 /*
11812 * Emit the epilog code.
11813 */
11814 uint32_t idxReturnLabel;
11815 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
11816
11817 /*
11818 * Generate special jump labels.
11819 */
11820 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
11821 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
11822 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
11823 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
11824 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
11825 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
11826 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
11827 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
11828 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
11829 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
11830 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
11831 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
11832 }
11833 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
11834 {
11835 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
11836 return pTb;
11837 }
11838 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
11839 Assert(off <= pReNative->cInstrBufAlloc);
11840
11841 /*
11842 * Make sure all labels has been defined.
11843 */
11844 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
11845#ifdef VBOX_STRICT
11846 uint32_t const cLabels = pReNative->cLabels;
11847 for (uint32_t i = 0; i < cLabels; i++)
11848 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
11849#endif
11850
11851 /*
11852 * Allocate executable memory, copy over the code we've generated.
11853 */
11854 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
11855 if (pTbAllocator->pDelayedFreeHead)
11856 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
11857
11858 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
11859 AssertReturn(paFinalInstrBuf, pTb);
11860 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
11861
11862 /*
11863 * Apply fixups.
11864 */
11865 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
11866 uint32_t const cFixups = pReNative->cFixups;
11867 for (uint32_t i = 0; i < cFixups; i++)
11868 {
11869 Assert(paFixups[i].off < off);
11870 Assert(paFixups[i].idxLabel < cLabels);
11871 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
11872 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
11873 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
11874 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
11875 switch (paFixups[i].enmType)
11876 {
11877#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
11878 case kIemNativeFixupType_Rel32:
11879 Assert(paFixups[i].off + 4 <= off);
11880 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
11881 continue;
11882
11883#elif defined(RT_ARCH_ARM64)
11884 case kIemNativeFixupType_RelImm26At0:
11885 {
11886 Assert(paFixups[i].off < off);
11887 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
11888 Assert(offDisp >= -262144 && offDisp < 262144);
11889 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
11890 continue;
11891 }
11892
11893 case kIemNativeFixupType_RelImm19At5:
11894 {
11895 Assert(paFixups[i].off < off);
11896 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
11897 Assert(offDisp >= -262144 && offDisp < 262144);
11898 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
11899 continue;
11900 }
11901
11902 case kIemNativeFixupType_RelImm14At5:
11903 {
11904 Assert(paFixups[i].off < off);
11905 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
11906 Assert(offDisp >= -8192 && offDisp < 8192);
11907 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
11908 continue;
11909 }
11910
11911#endif
11912 case kIemNativeFixupType_Invalid:
11913 case kIemNativeFixupType_End:
11914 break;
11915 }
11916 AssertFailed();
11917 }
11918
11919 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
11920 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
11921
11922 /*
11923 * Convert the translation block.
11924 */
11925 RTMemFree(pTb->Thrd.paCalls);
11926 pTb->Native.paInstructions = paFinalInstrBuf;
11927 pTb->Native.cInstructions = off;
11928 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
11929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
11930 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
11931 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
11932#endif
11933
11934 Assert(pTbAllocator->cThreadedTbs > 0);
11935 pTbAllocator->cThreadedTbs -= 1;
11936 pTbAllocator->cNativeTbs += 1;
11937 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
11938
11939#ifdef LOG_ENABLED
11940 /*
11941 * Disassemble to the log if enabled.
11942 */
11943 if (LogIs3Enabled())
11944 {
11945 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
11946 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
11947# ifdef DEBUG_bird
11948 RTLogFlush(NULL);
11949# endif
11950 }
11951#endif
11952 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
11953
11954 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
11955 return pTb;
11956}
11957
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette