VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102397

Last change on this file since 102397 was 102397, checked in by vboxsync, 14 months ago

VMM/IEM: Enabled native IEM_MC_STORE_MEM[_FLAT]_U8/16/32/64. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 389.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102397 2023-11-30 13:53:54Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef VBOX_STRICT
132static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
133 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
134static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
135#endif
136#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
137static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
138static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
139#endif
140DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
142 IEMNATIVEGSTREG enmGstReg, uint32_t off);
143
144static uint8_t iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516 /*
517 * Adjust the request size so it'll fit the allocator alignment/whatnot.
518 *
519 * For the RTHeapSimple allocator this means to follow the logic described
520 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
521 * existing chunks if we think we've got sufficient free memory around.
522 *
523 * While for the alternative one we just align it up to a whole unit size.
524 */
525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
526 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
527#else
528 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
529#endif
530 if (cbReq <= pExecMemAllocator->cbFree)
531 {
532 uint32_t const cChunks = pExecMemAllocator->cChunks;
533 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
534 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
535 {
536 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
537 if (pvRet)
538 return pvRet;
539 }
540 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
541 {
542 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
543 if (pvRet)
544 return pvRet;
545 }
546 }
547
548 /*
549 * Can we grow it with another chunk?
550 */
551 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
552 {
553 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
554 AssertLogRelRCReturn(rc, NULL);
555
556 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
557 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
558 if (pvRet)
559 return pvRet;
560 AssertFailed();
561 }
562
563 /* What now? Prune native translation blocks from the cache? */
564 AssertFailed();
565 return NULL;
566}
567
568
569/** This is a hook that we may need later for changing memory protection back
570 * to readonly+exec */
571static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
572{
573#ifdef RT_OS_DARWIN
574 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
575 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
576 AssertRC(rc); RT_NOREF(pVCpu);
577
578 /*
579 * Flush the instruction cache:
580 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
581 */
582 /* sys_dcache_flush(pv, cb); - not necessary */
583 sys_icache_invalidate(pv, cb);
584#else
585 RT_NOREF(pVCpu, pv, cb);
586#endif
587}
588
589
590/**
591 * Frees executable memory.
592 */
593void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
594{
595 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
596 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
597 Assert(pv);
598#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
599 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
600#else
601 Assert(!((uintptr_t)pv & 63));
602#endif
603
604 /* Align the size as we did when allocating the block. */
605#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
606 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
607#else
608 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
609#endif
610
611 /* Free it / assert sanity. */
612#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
613 uint32_t const cChunks = pExecMemAllocator->cChunks;
614 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
615 bool fFound = false;
616 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
617 {
618 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
619 fFound = offChunk < cbChunk;
620 if (fFound)
621 {
622#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
623 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
624 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
625
626 /* Check that it's valid and free it. */
627 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
628 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
629 for (uint32_t i = 1; i < cReqUnits; i++)
630 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
631 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
632
633 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
634 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
635
636 /* Update the stats. */
637 pExecMemAllocator->cbAllocated -= cb;
638 pExecMemAllocator->cbFree += cb;
639 pExecMemAllocator->cAllocations -= 1;
640 return;
641#else
642 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
643 break;
644#endif
645 }
646 }
647# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
648 AssertFailed();
649# else
650 Assert(fFound);
651# endif
652#endif
653
654#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
655 /* Update stats while cb is freshly calculated.*/
656 pExecMemAllocator->cbAllocated -= cb;
657 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
658 pExecMemAllocator->cAllocations -= 1;
659
660 /* Free it. */
661 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
662#endif
663}
664
665
666
667#ifdef IN_RING3
668# ifdef RT_OS_WINDOWS
669
670/**
671 * Initializes the unwind info structures for windows hosts.
672 */
673static int
674iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
675 void *pvChunk, uint32_t idxChunk)
676{
677 RT_NOREF(pVCpu);
678
679 /*
680 * The AMD64 unwind opcodes.
681 *
682 * This is a program that starts with RSP after a RET instruction that
683 * ends up in recompiled code, and the operations we describe here will
684 * restore all non-volatile registers and bring RSP back to where our
685 * RET address is. This means it's reverse order from what happens in
686 * the prologue.
687 *
688 * Note! Using a frame register approach here both because we have one
689 * and but mainly because the UWOP_ALLOC_LARGE argument values
690 * would be a pain to write initializers for. On the positive
691 * side, we're impervious to changes in the the stack variable
692 * area can can deal with dynamic stack allocations if necessary.
693 */
694 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
695 {
696 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
697 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
698 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
699 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
700 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
701 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
702 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
703 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
704 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
705 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
706 };
707 union
708 {
709 IMAGE_UNWIND_INFO Info;
710 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
711 } s_UnwindInfo =
712 {
713 {
714 /* .Version = */ 1,
715 /* .Flags = */ 0,
716 /* .SizeOfProlog = */ 16, /* whatever */
717 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
718 /* .FrameRegister = */ X86_GREG_xBP,
719 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
720 }
721 };
722 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
723 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
724
725 /*
726 * Calc how much space we need and allocate it off the exec heap.
727 */
728 unsigned const cFunctionEntries = 1;
729 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
730 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
731# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
732 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
733 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
734 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
735# else
736 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
737 - pExecMemAllocator->cbHeapBlockHdr;
738 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
739 32 /*cbAlignment*/);
740# endif
741 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
742 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
743
744 /*
745 * Initialize the structures.
746 */
747 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
748
749 paFunctions[0].BeginAddress = 0;
750 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
751 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
752
753 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
754 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
755
756 /*
757 * Register it.
758 */
759 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
760 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
761
762 return VINF_SUCCESS;
763}
764
765
766# else /* !RT_OS_WINDOWS */
767
768/**
769 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
770 */
771DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
772{
773 if (iValue >= 64)
774 {
775 Assert(iValue < 0x2000);
776 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
777 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
778 }
779 else if (iValue >= 0)
780 *Ptr.pb++ = (uint8_t)iValue;
781 else if (iValue > -64)
782 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
783 else
784 {
785 Assert(iValue > -0x2000);
786 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
787 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
788 }
789 return Ptr;
790}
791
792
793/**
794 * Emits an ULEB128 encoded value (up to 64-bit wide).
795 */
796DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
797{
798 while (uValue >= 0x80)
799 {
800 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
801 uValue >>= 7;
802 }
803 *Ptr.pb++ = (uint8_t)uValue;
804 return Ptr;
805}
806
807
808/**
809 * Emits a CFA rule as register @a uReg + offset @a off.
810 */
811DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
812{
813 *Ptr.pb++ = DW_CFA_def_cfa;
814 Ptr = iemDwarfPutUleb128(Ptr, uReg);
815 Ptr = iemDwarfPutUleb128(Ptr, off);
816 return Ptr;
817}
818
819
820/**
821 * Emits a register (@a uReg) save location:
822 * CFA + @a off * data_alignment_factor
823 */
824DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
825{
826 if (uReg < 0x40)
827 *Ptr.pb++ = DW_CFA_offset | uReg;
828 else
829 {
830 *Ptr.pb++ = DW_CFA_offset_extended;
831 Ptr = iemDwarfPutUleb128(Ptr, uReg);
832 }
833 Ptr = iemDwarfPutUleb128(Ptr, off);
834 return Ptr;
835}
836
837
838# if 0 /* unused */
839/**
840 * Emits a register (@a uReg) save location, using signed offset:
841 * CFA + @a offSigned * data_alignment_factor
842 */
843DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
844{
845 *Ptr.pb++ = DW_CFA_offset_extended_sf;
846 Ptr = iemDwarfPutUleb128(Ptr, uReg);
847 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
848 return Ptr;
849}
850# endif
851
852
853/**
854 * Initializes the unwind info section for non-windows hosts.
855 */
856static int
857iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
858 void *pvChunk, uint32_t idxChunk)
859{
860 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
861 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
862
863 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
864
865 /*
866 * Generate the CIE first.
867 */
868# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
869 uint8_t const iDwarfVer = 3;
870# else
871 uint8_t const iDwarfVer = 4;
872# endif
873 RTPTRUNION const PtrCie = Ptr;
874 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
875 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
876 *Ptr.pb++ = iDwarfVer; /* DwARF version */
877 *Ptr.pb++ = 0; /* Augmentation. */
878 if (iDwarfVer >= 4)
879 {
880 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
881 *Ptr.pb++ = 0; /* Segment selector size. */
882 }
883# ifdef RT_ARCH_AMD64
884 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
885# else
886 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
887# endif
888 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
891# elif defined(RT_ARCH_ARM64)
892 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
893# else
894# error "port me"
895# endif
896 /* Initial instructions: */
897# ifdef RT_ARCH_AMD64
898 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
899 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
906# elif defined(RT_ARCH_ARM64)
907# if 1
908 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
909# else
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
911# endif
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
924 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
925 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
926# else
927# error "port me"
928# endif
929 while ((Ptr.u - PtrCie.u) & 3)
930 *Ptr.pb++ = DW_CFA_nop;
931 /* Finalize the CIE size. */
932 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
933
934 /*
935 * Generate an FDE for the whole chunk area.
936 */
937# ifdef IEMNATIVE_USE_LIBUNWIND
938 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
939# endif
940 RTPTRUNION const PtrFde = Ptr;
941 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
942 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
943 Ptr.pu32++;
944 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
945 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
946# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
947 *Ptr.pb++ = DW_CFA_nop;
948# endif
949 while ((Ptr.u - PtrFde.u) & 3)
950 *Ptr.pb++ = DW_CFA_nop;
951 /* Finalize the FDE size. */
952 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
953
954 /* Terminator entry. */
955 *Ptr.pu32++ = 0;
956 *Ptr.pu32++ = 0; /* just to be sure... */
957 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
958
959 /*
960 * Register it.
961 */
962# ifdef IEMNATIVE_USE_LIBUNWIND
963 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
964# else
965 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
966 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
967# endif
968
969# ifdef IEMNATIVE_USE_GDB_JIT
970 /*
971 * Now for telling GDB about this (experimental).
972 *
973 * This seems to work best with ET_DYN.
974 */
975 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
976# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
977 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
978 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
979# else
980 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
981 - pExecMemAllocator->cbHeapBlockHdr;
982 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
983# endif
984 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
985 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
986
987 RT_ZERO(*pSymFile);
988
989 /*
990 * The ELF header:
991 */
992 pSymFile->EHdr.e_ident[0] = ELFMAG0;
993 pSymFile->EHdr.e_ident[1] = ELFMAG1;
994 pSymFile->EHdr.e_ident[2] = ELFMAG2;
995 pSymFile->EHdr.e_ident[3] = ELFMAG3;
996 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
997 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
998 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
999 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1000# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1001 pSymFile->EHdr.e_type = ET_DYN;
1002# else
1003 pSymFile->EHdr.e_type = ET_REL;
1004# endif
1005# ifdef RT_ARCH_AMD64
1006 pSymFile->EHdr.e_machine = EM_AMD64;
1007# elif defined(RT_ARCH_ARM64)
1008 pSymFile->EHdr.e_machine = EM_AARCH64;
1009# else
1010# error "port me"
1011# endif
1012 pSymFile->EHdr.e_version = 1; /*?*/
1013 pSymFile->EHdr.e_entry = 0;
1014# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1015 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1016# else
1017 pSymFile->EHdr.e_phoff = 0;
1018# endif
1019 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1020 pSymFile->EHdr.e_flags = 0;
1021 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1022# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1023 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1024 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1025# else
1026 pSymFile->EHdr.e_phentsize = 0;
1027 pSymFile->EHdr.e_phnum = 0;
1028# endif
1029 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1030 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1031 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1032
1033 uint32_t offStrTab = 0;
1034#define APPEND_STR(a_szStr) do { \
1035 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1036 offStrTab += sizeof(a_szStr); \
1037 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1038 } while (0)
1039#define APPEND_STR_FMT(a_szStr, ...) do { \
1040 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1041 offStrTab++; \
1042 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1043 } while (0)
1044
1045 /*
1046 * Section headers.
1047 */
1048 /* Section header #0: NULL */
1049 unsigned i = 0;
1050 APPEND_STR("");
1051 RT_ZERO(pSymFile->aShdrs[i]);
1052 i++;
1053
1054 /* Section header: .eh_frame */
1055 pSymFile->aShdrs[i].sh_name = offStrTab;
1056 APPEND_STR(".eh_frame");
1057 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1058 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1059# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1060 pSymFile->aShdrs[i].sh_offset
1061 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1062# else
1063 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1064 pSymFile->aShdrs[i].sh_offset = 0;
1065# endif
1066
1067 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1068 pSymFile->aShdrs[i].sh_link = 0;
1069 pSymFile->aShdrs[i].sh_info = 0;
1070 pSymFile->aShdrs[i].sh_addralign = 1;
1071 pSymFile->aShdrs[i].sh_entsize = 0;
1072 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1073 i++;
1074
1075 /* Section header: .shstrtab */
1076 unsigned const iShStrTab = i;
1077 pSymFile->EHdr.e_shstrndx = iShStrTab;
1078 pSymFile->aShdrs[i].sh_name = offStrTab;
1079 APPEND_STR(".shstrtab");
1080 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1081 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1082# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1083 pSymFile->aShdrs[i].sh_offset
1084 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1085# else
1086 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1087 pSymFile->aShdrs[i].sh_offset = 0;
1088# endif
1089 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1090 pSymFile->aShdrs[i].sh_link = 0;
1091 pSymFile->aShdrs[i].sh_info = 0;
1092 pSymFile->aShdrs[i].sh_addralign = 1;
1093 pSymFile->aShdrs[i].sh_entsize = 0;
1094 i++;
1095
1096 /* Section header: .symbols */
1097 pSymFile->aShdrs[i].sh_name = offStrTab;
1098 APPEND_STR(".symtab");
1099 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1100 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1101 pSymFile->aShdrs[i].sh_offset
1102 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1103 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1104 pSymFile->aShdrs[i].sh_link = iShStrTab;
1105 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1106 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1107 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1108 i++;
1109
1110# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1111 /* Section header: .symbols */
1112 pSymFile->aShdrs[i].sh_name = offStrTab;
1113 APPEND_STR(".dynsym");
1114 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1115 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1116 pSymFile->aShdrs[i].sh_offset
1117 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1118 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1119 pSymFile->aShdrs[i].sh_link = iShStrTab;
1120 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1121 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1122 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1123 i++;
1124# endif
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .dynamic */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynamic");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = 0;
1137 pSymFile->aShdrs[i].sh_addralign = 1;
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1139 i++;
1140# endif
1141
1142 /* Section header: .text */
1143 unsigned const iShText = i;
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".text");
1146 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1148# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1149 pSymFile->aShdrs[i].sh_offset
1150 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1151# else
1152 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1153 pSymFile->aShdrs[i].sh_offset = 0;
1154# endif
1155 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1156 pSymFile->aShdrs[i].sh_link = 0;
1157 pSymFile->aShdrs[i].sh_info = 0;
1158 pSymFile->aShdrs[i].sh_addralign = 1;
1159 pSymFile->aShdrs[i].sh_entsize = 0;
1160 i++;
1161
1162 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1163
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1165 /*
1166 * The program headers:
1167 */
1168 /* Everything in a single LOAD segment: */
1169 i = 0;
1170 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1171 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1172 pSymFile->aPhdrs[i].p_offset
1173 = pSymFile->aPhdrs[i].p_vaddr
1174 = pSymFile->aPhdrs[i].p_paddr = 0;
1175 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1176 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1177 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1178 i++;
1179 /* The .dynamic segment. */
1180 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1181 pSymFile->aPhdrs[i].p_flags = PF_R;
1182 pSymFile->aPhdrs[i].p_offset
1183 = pSymFile->aPhdrs[i].p_vaddr
1184 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1185 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1186 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1187 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1188 i++;
1189
1190 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1191
1192 /*
1193 * The dynamic section:
1194 */
1195 i = 0;
1196 pSymFile->aDyn[i].d_tag = DT_SONAME;
1197 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1198 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1199 i++;
1200 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1201 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1202 i++;
1203 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1204 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_NULL;
1213 i++;
1214 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1215# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1216
1217 /*
1218 * Symbol tables:
1219 */
1220 /** @todo gdb doesn't seem to really like this ... */
1221 i = 0;
1222 pSymFile->aSymbols[i].st_name = 0;
1223 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1224 pSymFile->aSymbols[i].st_value = 0;
1225 pSymFile->aSymbols[i].st_size = 0;
1226 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1227 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1228# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1229 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1230# endif
1231 i++;
1232
1233 pSymFile->aSymbols[i].st_name = 0;
1234 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1235 pSymFile->aSymbols[i].st_value = 0;
1236 pSymFile->aSymbols[i].st_size = 0;
1237 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1238 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1239 i++;
1240
1241 pSymFile->aSymbols[i].st_name = offStrTab;
1242 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1243# if 0
1244 pSymFile->aSymbols[i].st_shndx = iShText;
1245 pSymFile->aSymbols[i].st_value = 0;
1246# else
1247 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1248 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1249# endif
1250 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1251 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1252 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1253# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1254 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1255 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1256# endif
1257 i++;
1258
1259 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1260 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1261
1262 /*
1263 * The GDB JIT entry and informing GDB.
1264 */
1265 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1266# if 1
1267 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1268# else
1269 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1270# endif
1271
1272 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1273 RTCritSectEnter(&g_IemNativeGdbJitLock);
1274 pEhFrame->GdbJitEntry.pNext = NULL;
1275 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1276 if (__jit_debug_descriptor.pTail)
1277 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1278 else
1279 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1280 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1281 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1282
1283 /* Notify GDB: */
1284 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1285 __jit_debug_register_code();
1286 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1287 RTCritSectLeave(&g_IemNativeGdbJitLock);
1288
1289# else /* !IEMNATIVE_USE_GDB_JIT */
1290 RT_NOREF(pVCpu);
1291# endif /* !IEMNATIVE_USE_GDB_JIT */
1292
1293 return VINF_SUCCESS;
1294}
1295
1296# endif /* !RT_OS_WINDOWS */
1297#endif /* IN_RING3 */
1298
1299
1300/**
1301 * Adds another chunk to the executable memory allocator.
1302 *
1303 * This is used by the init code for the initial allocation and later by the
1304 * regular allocator function when it's out of memory.
1305 */
1306static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1307{
1308 /* Check that we've room for growth. */
1309 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1310 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1311
1312 /* Allocate a chunk. */
1313#ifdef RT_OS_DARWIN
1314 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1315#else
1316 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1317#endif
1318 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1319
1320#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1321 int rc = VINF_SUCCESS;
1322#else
1323 /* Initialize the heap for the chunk. */
1324 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1325 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1326 AssertRC(rc);
1327 if (RT_SUCCESS(rc))
1328 {
1329 /*
1330 * We want the memory to be aligned on 64 byte, so the first time thru
1331 * here we do some exploratory allocations to see how we can achieve this.
1332 * On subsequent runs we only make an initial adjustment allocation, if
1333 * necessary.
1334 *
1335 * Since we own the heap implementation, we know that the internal block
1336 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1337 * so all we need to wrt allocation size adjustments is to add 32 bytes
1338 * to the size, align up by 64 bytes, and subtract 32 bytes.
1339 *
1340 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1341 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1342 * allocation to force subsequent allocations to return 64 byte aligned
1343 * user areas.
1344 */
1345 if (!pExecMemAllocator->cbHeapBlockHdr)
1346 {
1347 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1348 pExecMemAllocator->cbHeapAlignTweak = 64;
1349 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1350 32 /*cbAlignment*/);
1351 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1352
1353 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1354 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1355 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1356 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1357 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1358
1359 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 RTHeapSimpleFree(hHeap, pvTest2);
1366 RTHeapSimpleFree(hHeap, pvTest1);
1367 }
1368 else
1369 {
1370 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1371 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1372 }
1373 if (RT_SUCCESS(rc))
1374#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1375 {
1376 /*
1377 * Add the chunk.
1378 *
1379 * This must be done before the unwind init so windows can allocate
1380 * memory from the chunk when using the alternative sub-allocator.
1381 */
1382 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1383#ifdef IN_RING3
1384 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1385#endif
1386#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1387 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1388#else
1389 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1390 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1391 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1392 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1393#endif
1394
1395 pExecMemAllocator->cChunks = idxChunk + 1;
1396 pExecMemAllocator->idxChunkHint = idxChunk;
1397
1398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1400 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1401#else
1402 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1403 pExecMemAllocator->cbTotal += cbFree;
1404 pExecMemAllocator->cbFree += cbFree;
1405#endif
1406
1407#ifdef IN_RING3
1408 /*
1409 * Initialize the unwind information (this cannot really fail atm).
1410 * (This sets pvUnwindInfo.)
1411 */
1412 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1413 if (RT_SUCCESS(rc))
1414#endif
1415 {
1416 return VINF_SUCCESS;
1417 }
1418
1419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1420 /* Just in case the impossible happens, undo the above up: */
1421 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1422 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1423 pExecMemAllocator->cChunks = idxChunk;
1424 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1425 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1426 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1427 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1428#endif
1429 }
1430#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1431 }
1432#endif
1433 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1434 RT_NOREF(pVCpu);
1435 return rc;
1436}
1437
1438
1439/**
1440 * Initializes the executable memory allocator for native recompilation on the
1441 * calling EMT.
1442 *
1443 * @returns VBox status code.
1444 * @param pVCpu The cross context virtual CPU structure of the calling
1445 * thread.
1446 * @param cbMax The max size of the allocator.
1447 * @param cbInitial The initial allocator size.
1448 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1449 * dependent).
1450 */
1451int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1452{
1453 /*
1454 * Validate input.
1455 */
1456 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1457 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1458 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1459 || cbChunk == 0
1460 || ( RT_IS_POWER_OF_TWO(cbChunk)
1461 && cbChunk >= _1M
1462 && cbChunk <= _256M
1463 && cbChunk <= cbMax),
1464 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1465 VERR_OUT_OF_RANGE);
1466
1467 /*
1468 * Adjust/figure out the chunk size.
1469 */
1470 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1471 {
1472 if (cbMax >= _256M)
1473 cbChunk = _64M;
1474 else
1475 {
1476 if (cbMax < _16M)
1477 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1478 else
1479 cbChunk = (uint32_t)cbMax / 4;
1480 if (!RT_IS_POWER_OF_TWO(cbChunk))
1481 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1482 }
1483 }
1484
1485 if (cbChunk > cbMax)
1486 cbMax = cbChunk;
1487 else
1488 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1489 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1490 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1491
1492 /*
1493 * Allocate and initialize the allocatore instance.
1494 */
1495 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1496#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1497 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1498 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1499 cbNeeded += cbBitmap * cMaxChunks;
1500 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1501 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1502#endif
1503#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1504 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1505 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1506#endif
1507 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1508 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1509 VERR_NO_MEMORY);
1510 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1511 pExecMemAllocator->cbChunk = cbChunk;
1512 pExecMemAllocator->cMaxChunks = cMaxChunks;
1513 pExecMemAllocator->cChunks = 0;
1514 pExecMemAllocator->idxChunkHint = 0;
1515 pExecMemAllocator->cAllocations = 0;
1516 pExecMemAllocator->cbTotal = 0;
1517 pExecMemAllocator->cbFree = 0;
1518 pExecMemAllocator->cbAllocated = 0;
1519#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1520 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1521 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1522 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1523 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1524#endif
1525#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1526 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1527#endif
1528 for (uint32_t i = 0; i < cMaxChunks; i++)
1529 {
1530#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1531 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1532 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1533#else
1534 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1535#endif
1536 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1537#ifdef IN_RING0
1538 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1539#else
1540 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1541#endif
1542 }
1543 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1544
1545 /*
1546 * Do the initial allocations.
1547 */
1548 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1549 {
1550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1551 AssertLogRelRCReturn(rc, rc);
1552 }
1553
1554 pExecMemAllocator->idxChunkHint = 0;
1555
1556 return VINF_SUCCESS;
1557}
1558
1559
1560/*********************************************************************************************************************************
1561* Native Recompilation *
1562*********************************************************************************************************************************/
1563
1564
1565/**
1566 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1567 */
1568IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1569{
1570 pVCpu->iem.s.cInstructions += idxInstr;
1571 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1572}
1573
1574
1575/**
1576 * Used by TB code when it wants to raise a \#GP(0).
1577 */
1578IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1579{
1580#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1581 pVCpu->iem.s.idxTbCurInstr = idxInstr;
1582#else
1583 RT_NOREF(idxInstr);
1584#endif
1585 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1586#ifndef _MSC_VER
1587 return VINF_IEM_RAISED_XCPT; /* not reached */
1588#endif
1589}
1590
1591
1592/* Segmented memory helpers: */
1593
1594/**
1595 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1596 */
1597IEM_DECL_NATIVE_HLP_DEF(uint8_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1598{
1599 return iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1600}
1601
1602
1603/**
1604 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1607{
1608 return iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1609}
1610
1611
1612/**
1613 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1614 */
1615IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1616{
1617 return iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1618}
1619
1620
1621/**
1622 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1623 */
1624IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1625{
1626 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1627}
1628
1629
1630/**
1631 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1632 */
1633IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1634{
1635 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1636}
1637
1638
1639/**
1640 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1643{
1644 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1645}
1646
1647
1648/**
1649 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1652{
1653 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1654}
1655
1656
1657/**
1658 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1661{
1662 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1663}
1664
1665
1666/* Flat memory helpers: */
1667
1668/**
1669 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1670 */
1671IEM_DECL_NATIVE_HLP_DEF(uint8_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1672{
1673 return iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1674}
1675
1676
1677/**
1678 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1681{
1682 return iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1683}
1684
1685
1686/**
1687 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1690{
1691 return iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1692}
1693
1694
1695/**
1696 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1697 */
1698IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1699{
1700 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1701}
1702
1703
1704/**
1705 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1708{
1709 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
1710}
1711
1712
1713/**
1714 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1717{
1718 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
1719}
1720
1721
1722/**
1723 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1726{
1727 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
1728}
1729
1730
1731/**
1732 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1735{
1736 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
1737}
1738
1739
1740
1741
1742/**
1743 * Reinitializes the native recompiler state.
1744 *
1745 * Called before starting a new recompile job.
1746 */
1747static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1748{
1749 pReNative->cLabels = 0;
1750 pReNative->bmLabelTypes = 0;
1751 pReNative->cFixups = 0;
1752#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1753 pReNative->pDbgInfo->cEntries = 0;
1754#endif
1755 pReNative->pTbOrg = pTb;
1756 pReNative->cCondDepth = 0;
1757 pReNative->uCondSeqNo = 0;
1758 pReNative->uCheckIrqSeqNo = 0;
1759 pReNative->uTlbSeqNo = 0;
1760
1761 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1762#if IEMNATIVE_HST_GREG_COUNT < 32
1763 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1764#endif
1765 ;
1766 pReNative->Core.bmHstRegsWithGstShadow = 0;
1767 pReNative->Core.bmGstRegShadows = 0;
1768 pReNative->Core.bmVars = 0;
1769 pReNative->Core.bmStack = 0;
1770 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1771 pReNative->Core.u64ArgVars = UINT64_MAX;
1772
1773 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 6);
1774 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1775 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1776 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1777 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1778 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1779 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1780
1781 /* Full host register reinit: */
1782 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1783 {
1784 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1785 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1786 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1787 }
1788
1789 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1790 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1791#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1792 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1793#endif
1794#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1795 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1796#endif
1797 );
1798 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1799 {
1800 fRegs &= ~RT_BIT_32(idxReg);
1801 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1802 }
1803
1804 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1805#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1806 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1807#endif
1808#ifdef IEMNATIVE_REG_FIXED_TMP0
1809 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1810#endif
1811 return pReNative;
1812}
1813
1814
1815/**
1816 * Allocates and initializes the native recompiler state.
1817 *
1818 * This is called the first time an EMT wants to recompile something.
1819 *
1820 * @returns Pointer to the new recompiler state.
1821 * @param pVCpu The cross context virtual CPU structure of the calling
1822 * thread.
1823 * @param pTb The TB that's about to be recompiled.
1824 * @thread EMT(pVCpu)
1825 */
1826static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1827{
1828 VMCPU_ASSERT_EMT(pVCpu);
1829
1830 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1831 AssertReturn(pReNative, NULL);
1832
1833 /*
1834 * Try allocate all the buffers and stuff we need.
1835 */
1836 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1837 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1838 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1839#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1840 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1841#endif
1842 if (RT_LIKELY( pReNative->pInstrBuf
1843 && pReNative->paLabels
1844 && pReNative->paFixups)
1845#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1846 && pReNative->pDbgInfo
1847#endif
1848 )
1849 {
1850 /*
1851 * Set the buffer & array sizes on success.
1852 */
1853 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1854 pReNative->cLabelsAlloc = _8K;
1855 pReNative->cFixupsAlloc = _16K;
1856#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1857 pReNative->cDbgInfoAlloc = _16K;
1858#endif
1859
1860 /*
1861 * Done, just need to save it and reinit it.
1862 */
1863 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1864 return iemNativeReInit(pReNative, pTb);
1865 }
1866
1867 /*
1868 * Failed. Cleanup and return.
1869 */
1870 AssertFailed();
1871 RTMemFree(pReNative->pInstrBuf);
1872 RTMemFree(pReNative->paLabels);
1873 RTMemFree(pReNative->paFixups);
1874#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1875 RTMemFree(pReNative->pDbgInfo);
1876#endif
1877 RTMemFree(pReNative);
1878 return NULL;
1879}
1880
1881
1882/**
1883 * Creates a label
1884 *
1885 * If the label does not yet have a defined position,
1886 * call iemNativeLabelDefine() later to set it.
1887 *
1888 * @returns Label ID. Throws VBox status code on failure, so no need to check
1889 * the return value.
1890 * @param pReNative The native recompile state.
1891 * @param enmType The label type.
1892 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1893 * label is not yet defined (default).
1894 * @param uData Data associated with the lable. Only applicable to
1895 * certain type of labels. Default is zero.
1896 */
1897DECL_HIDDEN_THROW(uint32_t)
1898iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1899 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1900{
1901 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
1902
1903 /*
1904 * Locate existing label definition.
1905 *
1906 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1907 * and uData is zero.
1908 */
1909 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1910 uint32_t const cLabels = pReNative->cLabels;
1911 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1912#ifndef VBOX_STRICT
1913 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
1914 && offWhere == UINT32_MAX
1915 && uData == 0
1916#endif
1917 )
1918 {
1919#ifndef VBOX_STRICT
1920 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
1921 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1922 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
1923 if (idxLabel < pReNative->cLabels)
1924 return idxLabel;
1925#else
1926 for (uint32_t i = 0; i < cLabels; i++)
1927 if ( paLabels[i].enmType == enmType
1928 && paLabels[i].uData == uData)
1929 {
1930 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1931 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1932 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1933 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
1934 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1935 return i;
1936 }
1937 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
1938 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1939#endif
1940 }
1941
1942 /*
1943 * Make sure we've got room for another label.
1944 */
1945 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1946 { /* likely */ }
1947 else
1948 {
1949 uint32_t cNew = pReNative->cLabelsAlloc;
1950 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1951 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1952 cNew *= 2;
1953 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1954 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1955 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1956 pReNative->paLabels = paLabels;
1957 pReNative->cLabelsAlloc = cNew;
1958 }
1959
1960 /*
1961 * Define a new label.
1962 */
1963 paLabels[cLabels].off = offWhere;
1964 paLabels[cLabels].enmType = enmType;
1965 paLabels[cLabels].uData = uData;
1966 pReNative->cLabels = cLabels + 1;
1967
1968 Assert((unsigned)enmType < 64);
1969 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1970
1971 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
1972 {
1973 Assert(uData == 0);
1974 pReNative->aidxUniqueLabels[enmType] = cLabels;
1975 }
1976
1977 if (offWhere != UINT32_MAX)
1978 {
1979#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1980 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1981 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1982#endif
1983 }
1984 return cLabels;
1985}
1986
1987
1988/**
1989 * Defines the location of an existing label.
1990 *
1991 * @param pReNative The native recompile state.
1992 * @param idxLabel The label to define.
1993 * @param offWhere The position.
1994 */
1995DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1996{
1997 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1998 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1999 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2000 pLabel->off = offWhere;
2001#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2002 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2003 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2004#endif
2005}
2006
2007
2008/**
2009 * Looks up a lable.
2010 *
2011 * @returns Label ID if found, UINT32_MAX if not.
2012 */
2013static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2014 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2015{
2016 Assert((unsigned)enmType < 64);
2017 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2018 {
2019 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2020 return pReNative->aidxUniqueLabels[enmType];
2021
2022 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2023 uint32_t const cLabels = pReNative->cLabels;
2024 for (uint32_t i = 0; i < cLabels; i++)
2025 if ( paLabels[i].enmType == enmType
2026 && paLabels[i].uData == uData
2027 && ( paLabels[i].off == offWhere
2028 || offWhere == UINT32_MAX
2029 || paLabels[i].off == UINT32_MAX))
2030 return i;
2031 }
2032 return UINT32_MAX;
2033}
2034
2035
2036/**
2037 * Adds a fixup.
2038 *
2039 * @throws VBox status code (int) on failure.
2040 * @param pReNative The native recompile state.
2041 * @param offWhere The instruction offset of the fixup location.
2042 * @param idxLabel The target label ID for the fixup.
2043 * @param enmType The fixup type.
2044 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2045 */
2046DECL_HIDDEN_THROW(void)
2047iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2048 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2049{
2050 Assert(idxLabel <= UINT16_MAX);
2051 Assert((unsigned)enmType <= UINT8_MAX);
2052
2053 /*
2054 * Make sure we've room.
2055 */
2056 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2057 uint32_t const cFixups = pReNative->cFixups;
2058 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2059 { /* likely */ }
2060 else
2061 {
2062 uint32_t cNew = pReNative->cFixupsAlloc;
2063 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2064 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2065 cNew *= 2;
2066 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2067 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2068 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2069 pReNative->paFixups = paFixups;
2070 pReNative->cFixupsAlloc = cNew;
2071 }
2072
2073 /*
2074 * Add the fixup.
2075 */
2076 paFixups[cFixups].off = offWhere;
2077 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2078 paFixups[cFixups].enmType = enmType;
2079 paFixups[cFixups].offAddend = offAddend;
2080 pReNative->cFixups = cFixups + 1;
2081}
2082
2083
2084/**
2085 * Slow code path for iemNativeInstrBufEnsure.
2086 */
2087DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2088{
2089 /* Double the buffer size till we meet the request. */
2090 uint32_t cNew = pReNative->cInstrBufAlloc;
2091 AssertReturn(cNew > 0, NULL);
2092 do
2093 cNew *= 2;
2094 while (cNew < off + cInstrReq);
2095
2096 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2097#ifdef RT_ARCH_ARM64
2098 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2099#else
2100 uint32_t const cbMaxInstrBuf = _2M;
2101#endif
2102 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2103
2104 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2105 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2106
2107 pReNative->cInstrBufAlloc = cNew;
2108 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2109}
2110
2111#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2112
2113/**
2114 * Grows the static debug info array used during recompilation.
2115 *
2116 * @returns Pointer to the new debug info block; throws VBox status code on
2117 * failure, so no need to check the return value.
2118 */
2119DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2120{
2121 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2122 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2123 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2124 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2125 pReNative->pDbgInfo = pDbgInfo;
2126 pReNative->cDbgInfoAlloc = cNew;
2127 return pDbgInfo;
2128}
2129
2130
2131/**
2132 * Adds a new debug info uninitialized entry, returning the pointer to it.
2133 */
2134DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2135{
2136 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2137 { /* likely */ }
2138 else
2139 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2140 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2141}
2142
2143
2144/**
2145 * Debug Info: Adds a native offset record, if necessary.
2146 */
2147static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2148{
2149 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2150
2151 /*
2152 * Search backwards to see if we've got a similar record already.
2153 */
2154 uint32_t idx = pDbgInfo->cEntries;
2155 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2156 while (idx-- > idxStop)
2157 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2158 {
2159 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2160 return;
2161 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2162 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2163 break;
2164 }
2165
2166 /*
2167 * Add it.
2168 */
2169 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2170 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2171 pEntry->NativeOffset.offNative = off;
2172}
2173
2174
2175/**
2176 * Debug Info: Record info about a label.
2177 */
2178static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2179{
2180 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2181 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2182 pEntry->Label.uUnused = 0;
2183 pEntry->Label.enmLabel = (uint8_t)enmType;
2184 pEntry->Label.uData = uData;
2185}
2186
2187
2188/**
2189 * Debug Info: Record info about a threaded call.
2190 */
2191static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2192{
2193 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2194 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2195 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2196 pEntry->ThreadedCall.uUnused = 0;
2197 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2198}
2199
2200
2201/**
2202 * Debug Info: Record info about a new guest instruction.
2203 */
2204static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2205{
2206 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2207 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2208 pEntry->GuestInstruction.uUnused = 0;
2209 pEntry->GuestInstruction.fExec = fExec;
2210}
2211
2212
2213/**
2214 * Debug Info: Record info about guest register shadowing.
2215 */
2216static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2217 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2218{
2219 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2220 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2221 pEntry->GuestRegShadowing.uUnused = 0;
2222 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2223 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2224 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2225}
2226
2227#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2228
2229
2230/*********************************************************************************************************************************
2231* Register Allocator *
2232*********************************************************************************************************************************/
2233
2234/**
2235 * Register parameter indexes (indexed by argument number).
2236 */
2237DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2238{
2239 IEMNATIVE_CALL_ARG0_GREG,
2240 IEMNATIVE_CALL_ARG1_GREG,
2241 IEMNATIVE_CALL_ARG2_GREG,
2242 IEMNATIVE_CALL_ARG3_GREG,
2243#if defined(IEMNATIVE_CALL_ARG4_GREG)
2244 IEMNATIVE_CALL_ARG4_GREG,
2245# if defined(IEMNATIVE_CALL_ARG5_GREG)
2246 IEMNATIVE_CALL_ARG5_GREG,
2247# if defined(IEMNATIVE_CALL_ARG6_GREG)
2248 IEMNATIVE_CALL_ARG6_GREG,
2249# if defined(IEMNATIVE_CALL_ARG7_GREG)
2250 IEMNATIVE_CALL_ARG7_GREG,
2251# endif
2252# endif
2253# endif
2254#endif
2255};
2256
2257/**
2258 * Call register masks indexed by argument count.
2259 */
2260DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2261{
2262 0,
2263 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2264 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2265 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2266 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2267 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2268#if defined(IEMNATIVE_CALL_ARG4_GREG)
2269 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2270 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2271# if defined(IEMNATIVE_CALL_ARG5_GREG)
2272 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2273 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2274# if defined(IEMNATIVE_CALL_ARG6_GREG)
2275 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2276 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2277 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2278# if defined(IEMNATIVE_CALL_ARG7_GREG)
2279 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2280 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2281 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2282# endif
2283# endif
2284# endif
2285#endif
2286};
2287
2288#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2289/**
2290 * BP offset of the stack argument slots.
2291 *
2292 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2293 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2294 */
2295DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2296{
2297 IEMNATIVE_FP_OFF_STACK_ARG0,
2298# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2299 IEMNATIVE_FP_OFF_STACK_ARG1,
2300# endif
2301# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2302 IEMNATIVE_FP_OFF_STACK_ARG2,
2303# endif
2304# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2305 IEMNATIVE_FP_OFF_STACK_ARG3,
2306# endif
2307};
2308AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2309#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2310
2311/**
2312 * Info about shadowed guest register values.
2313 * @see IEMNATIVEGSTREG
2314 */
2315static struct
2316{
2317 /** Offset in VMCPU. */
2318 uint32_t off;
2319 /** The field size. */
2320 uint8_t cb;
2321 /** Name (for logging). */
2322 const char *pszName;
2323} const g_aGstShadowInfo[] =
2324{
2325#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2326 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2327 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2328 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2329 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2330 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2331 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2332 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2333 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2334 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2335 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2336 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2337 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2338 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2339 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2340 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2341 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2342 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2343 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2344 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2345 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2346 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2347 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2348 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2349 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2350 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2351 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2352 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2353 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2354 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2355 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2356 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2357 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2358 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2359 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2360 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2361 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2362#undef CPUMCTX_OFF_AND_SIZE
2363};
2364AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2365
2366
2367/** Host CPU general purpose register names. */
2368DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2369{
2370#ifdef RT_ARCH_AMD64
2371 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2372#elif RT_ARCH_ARM64
2373 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2374 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2375#else
2376# error "port me"
2377#endif
2378};
2379
2380
2381DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2382 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2383{
2384 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2385
2386 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2387 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2388 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2389 return (uint8_t)idxReg;
2390}
2391
2392
2393/**
2394 * Tries to locate a suitable register in the given register mask.
2395 *
2396 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2397 * failed.
2398 *
2399 * @returns Host register number on success, returns UINT8_MAX on failure.
2400 */
2401static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2402{
2403 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2404 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2405 if (fRegs)
2406 {
2407 /** @todo pick better here: */
2408 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2409
2410 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2411 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2412 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2413 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2414
2415 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2416 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2417 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2418 return idxReg;
2419 }
2420 return UINT8_MAX;
2421}
2422
2423
2424/**
2425 * Locate a register, possibly freeing one up.
2426 *
2427 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2428 * failed.
2429 *
2430 * @returns Host register number on success. Returns UINT8_MAX if no registers
2431 * found, the caller is supposed to deal with this and raise a
2432 * allocation type specific status code (if desired).
2433 *
2434 * @throws VBox status code if we're run into trouble spilling a variable of
2435 * recording debug info. Does NOT throw anything if we're out of
2436 * registers, though.
2437 */
2438static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2439 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2440{
2441 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2442 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2443
2444 /*
2445 * Try a freed register that's shadowing a guest register
2446 */
2447 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2448 if (fRegs)
2449 {
2450 unsigned const idxReg = (fPreferVolatile
2451 ? ASMBitFirstSetU32(fRegs)
2452 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2453 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2454 - 1;
2455
2456 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2457 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2458 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2459 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2460
2461 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2462 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2463 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2464 return idxReg;
2465 }
2466
2467 /*
2468 * Try free up a variable that's in a register.
2469 *
2470 * We do two rounds here, first evacuating variables we don't need to be
2471 * saved on the stack, then in the second round move things to the stack.
2472 */
2473 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2474 {
2475 uint32_t fVars = pReNative->Core.bmVars;
2476 while (fVars)
2477 {
2478 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2479 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2480/** @todo Prevent active variables from changing here... */
2481 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2482 && (RT_BIT_32(idxReg) & fRegMask)
2483 && ( iLoop == 0
2484 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2485 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2486 {
2487 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2488 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2489 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2490 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2491 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2492 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2493
2494 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2495 {
2496 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
2497 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2498 }
2499
2500 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2501 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2502 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2503 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2504 return idxReg;
2505 }
2506 fVars &= ~RT_BIT_32(idxVar);
2507 }
2508 }
2509
2510 return UINT8_MAX;
2511}
2512
2513
2514/**
2515 * Reassigns a variable to a different register specified by the caller.
2516 *
2517 * @returns The new code buffer position.
2518 * @param pReNative The native recompile state.
2519 * @param off The current code buffer position.
2520 * @param idxVar The variable index.
2521 * @param idxRegOld The old host register number.
2522 * @param idxRegNew The new host register number.
2523 * @param pszCaller The caller for logging.
2524 */
2525static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2526 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2527{
2528 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
2529 RT_NOREF(pszCaller);
2530
2531 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2532
2533 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2534 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
2535 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2536 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2537
2538 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2539 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2540 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2541 if (fGstRegShadows)
2542 {
2543 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2544 while (fGstRegShadows)
2545 {
2546 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2547 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2548
2549 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2550 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2551 }
2552 }
2553
2554 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2555 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2556 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2557 return off;
2558}
2559
2560
2561/**
2562 * Moves a variable to a different register or spills it onto the stack.
2563 *
2564 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2565 * kinds can easily be recreated if needed later.
2566 *
2567 * @returns The new code buffer position.
2568 * @param pReNative The native recompile state.
2569 * @param off The current code buffer position.
2570 * @param idxVar The variable index.
2571 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2572 * call-volatile registers.
2573 */
2574static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2575 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2576{
2577 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2578 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2579
2580 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2581 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2582 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2583 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2584 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2585 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2586 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2587 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2588 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2589
2590
2591 /** @todo Add statistics on this.*/
2592 /** @todo Implement basic variable liveness analysis (python) so variables
2593 * can be freed immediately once no longer used. This has the potential to
2594 * be trashing registers and stack for dead variables. */
2595
2596 /*
2597 * First try move it to a different register, as that's cheaper.
2598 */
2599 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2600 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2601 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2602 if (fRegs)
2603 {
2604 /* Avoid using shadow registers, if possible. */
2605 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2606 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2607 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2608 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
2609 }
2610
2611 /*
2612 * Otherwise we must spill the register onto the stack.
2613 */
2614 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
2615 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
2616 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
2617 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
2618
2619 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2620 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2621 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2622 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2623 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2624 return off;
2625}
2626
2627
2628/**
2629 * Allocates a temporary host general purpose register.
2630 *
2631 * This may emit code to save register content onto the stack in order to free
2632 * up a register.
2633 *
2634 * @returns The host register number; throws VBox status code on failure,
2635 * so no need to check the return value.
2636 * @param pReNative The native recompile state.
2637 * @param poff Pointer to the variable with the code buffer position.
2638 * This will be update if we need to move a variable from
2639 * register to stack in order to satisfy the request.
2640 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2641 * registers (@c true, default) or the other way around
2642 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2643 */
2644DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2645{
2646 /*
2647 * Try find a completely unused register, preferably a call-volatile one.
2648 */
2649 uint8_t idxReg;
2650 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2651 & ~pReNative->Core.bmHstRegsWithGstShadow
2652 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2653 if (fRegs)
2654 {
2655 if (fPreferVolatile)
2656 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2657 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2658 else
2659 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2660 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2661 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2662 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2663 }
2664 else
2665 {
2666 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2667 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2668 }
2669 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2670}
2671
2672
2673/**
2674 * Allocates a temporary register for loading an immediate value into.
2675 *
2676 * This will emit code to load the immediate, unless there happens to be an
2677 * unused register with the value already loaded.
2678 *
2679 * The caller will not modify the returned register, it must be considered
2680 * read-only. Free using iemNativeRegFreeTmpImm.
2681 *
2682 * @returns The host register number; throws VBox status code on failure, so no
2683 * need to check the return value.
2684 * @param pReNative The native recompile state.
2685 * @param poff Pointer to the variable with the code buffer position.
2686 * @param uImm The immediate value that the register must hold upon
2687 * return.
2688 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2689 * registers (@c true, default) or the other way around
2690 * (@c false).
2691 *
2692 * @note Reusing immediate values has not been implemented yet.
2693 */
2694DECL_HIDDEN_THROW(uint8_t)
2695iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2696{
2697 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2698 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2699 return idxReg;
2700}
2701
2702
2703/**
2704 * Marks host register @a idxHstReg as containing a shadow copy of guest
2705 * register @a enmGstReg.
2706 *
2707 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2708 * host register before calling.
2709 */
2710DECL_FORCE_INLINE(void)
2711iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2712{
2713 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2714 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2715
2716 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2717 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2718 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2719 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2720#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2721 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2722 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2723#else
2724 RT_NOREF(off);
2725#endif
2726}
2727
2728
2729/**
2730 * Clear any guest register shadow claims from @a idxHstReg.
2731 *
2732 * The register does not need to be shadowing any guest registers.
2733 */
2734DECL_FORCE_INLINE(void)
2735iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2736{
2737 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2738 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2739 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2740 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2741 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2742
2743#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2744 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2745 if (fGstRegs)
2746 {
2747 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
2748 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2749 while (fGstRegs)
2750 {
2751 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2752 fGstRegs &= ~RT_BIT_64(iGstReg);
2753 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2754 }
2755 }
2756#else
2757 RT_NOREF(off);
2758#endif
2759
2760 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2761 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2762 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2763}
2764
2765
2766/**
2767 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
2768 * and global overview flags.
2769 */
2770DECL_FORCE_INLINE(void)
2771iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2772{
2773 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2774 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2775 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2776 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2777 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
2778 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2779 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2780
2781#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2782 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2783 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
2784#else
2785 RT_NOREF(off);
2786#endif
2787
2788 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2789 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
2790 if (!fGstRegShadowsNew)
2791 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2792 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
2793}
2794
2795
2796/**
2797 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2798 * to @a idxRegTo.
2799 */
2800DECL_FORCE_INLINE(void)
2801iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2802 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2803{
2804 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2805 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2806 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
2807 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2808 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
2809 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
2810 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2811 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2812
2813 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2814 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
2815 if (!fGstRegShadowsFrom)
2816 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
2817 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
2818 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
2819 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2820#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2821 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2822 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2823#else
2824 RT_NOREF(off);
2825#endif
2826}
2827
2828
2829/**
2830 * Allocates a temporary host general purpose register for keeping a guest
2831 * register value.
2832 *
2833 * Since we may already have a register holding the guest register value,
2834 * code will be emitted to do the loading if that's not the case. Code may also
2835 * be emitted if we have to free up a register to satify the request.
2836 *
2837 * @returns The host register number; throws VBox status code on failure, so no
2838 * need to check the return value.
2839 * @param pReNative The native recompile state.
2840 * @param poff Pointer to the variable with the code buffer
2841 * position. This will be update if we need to move a
2842 * variable from register to stack in order to satisfy
2843 * the request.
2844 * @param enmGstReg The guest register that will is to be updated.
2845 * @param enmIntendedUse How the caller will be using the host register.
2846 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2847 */
2848DECL_HIDDEN_THROW(uint8_t)
2849iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2850 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2851{
2852 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2853#ifdef LOG_ENABLED
2854 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
2855#endif
2856
2857 /*
2858 * First check if the guest register value is already in a host register.
2859 */
2860 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2861 {
2862 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2863 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2864 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2865 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2866
2867 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2868 {
2869 /*
2870 * If the register will trash the guest shadow copy, try find a
2871 * completely unused register we can use instead. If that fails,
2872 * we need to disassociate the host reg from the guest reg.
2873 */
2874 /** @todo would be nice to know if preserving the register is in any way helpful. */
2875 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2876 && ( ~pReNative->Core.bmHstRegs
2877 & ~pReNative->Core.bmHstRegsWithGstShadow
2878 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2879 {
2880 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2881
2882 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2883
2884 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2885 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2886 g_apszIemNativeHstRegNames[idxRegNew]));
2887 idxReg = idxRegNew;
2888 }
2889 else
2890 {
2891 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2892 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2893 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2894 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2895 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2896 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2897 else
2898 {
2899 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2900 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2901 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2902 }
2903 }
2904 }
2905 else
2906 {
2907 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2908 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
2909 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
2910 idxReg, s_pszIntendedUse[enmIntendedUse]));
2911
2912 /*
2913 * Allocate a new register, copy the value and, if updating, the
2914 * guest shadow copy assignment to the new register.
2915 */
2916 /** @todo share register for readonly access. */
2917 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2918
2919 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2920 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2921
2922 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2923 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2924 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2925 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2926 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2927 else
2928 {
2929 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2930 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
2931 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2932 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2933 }
2934 idxReg = idxRegNew;
2935 }
2936
2937#ifdef VBOX_STRICT
2938 /* Strict builds: Check that the value is correct. */
2939 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2940#endif
2941
2942 return idxReg;
2943 }
2944
2945 /*
2946 * Allocate a new register, load it with the guest value and designate it as a copy of the
2947 */
2948 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2949
2950 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2951 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2952
2953 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2954 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2955 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2956 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2957
2958 return idxRegNew;
2959}
2960
2961
2962/**
2963 * Allocates a temporary host general purpose register that already holds the
2964 * given guest register value.
2965 *
2966 * The use case for this function is places where the shadowing state cannot be
2967 * modified due to branching and such. This will fail if the we don't have a
2968 * current shadow copy handy or if it's incompatible. The only code that will
2969 * be emitted here is value checking code in strict builds.
2970 *
2971 * The intended use can only be readonly!
2972 *
2973 * @returns The host register number, UINT8_MAX if not present.
2974 * @param pReNative The native recompile state.
2975 * @param poff Pointer to the instruction buffer offset.
2976 * Will be updated in strict builds if a register is
2977 * found.
2978 * @param enmGstReg The guest register that will is to be updated.
2979 * @note In strict builds, this may throw instruction buffer growth failures.
2980 * Non-strict builds will not throw anything.
2981 * @sa iemNativeRegAllocTmpForGuestReg
2982 */
2983DECL_HIDDEN_THROW(uint8_t)
2984iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2985{
2986 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2987
2988 /*
2989 * First check if the guest register value is already in a host register.
2990 */
2991 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2992 {
2993 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2994 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2995 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2996 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2997
2998 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2999 {
3000 /*
3001 * We only do readonly use here, so easy compared to the other
3002 * variant of this code.
3003 */
3004 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3006 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3007 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3008 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3009
3010#ifdef VBOX_STRICT
3011 /* Strict builds: Check that the value is correct. */
3012 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3013#else
3014 RT_NOREF(poff);
3015#endif
3016 return idxReg;
3017 }
3018 }
3019
3020 return UINT8_MAX;
3021}
3022
3023
3024DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
3025
3026
3027/**
3028 * Allocates argument registers for a function call.
3029 *
3030 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3031 * need to check the return value.
3032 * @param pReNative The native recompile state.
3033 * @param off The current code buffer offset.
3034 * @param cArgs The number of arguments the function call takes.
3035 */
3036DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3037{
3038 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3039 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3040 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3041 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3042
3043 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3044 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3045 else if (cArgs == 0)
3046 return true;
3047
3048 /*
3049 * Do we get luck and all register are free and not shadowing anything?
3050 */
3051 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3052 for (uint32_t i = 0; i < cArgs; i++)
3053 {
3054 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3055 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3056 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3057 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3058 }
3059 /*
3060 * Okay, not lucky so we have to free up the registers.
3061 */
3062 else
3063 for (uint32_t i = 0; i < cArgs; i++)
3064 {
3065 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3066 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3067 {
3068 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3069 {
3070 case kIemNativeWhat_Var:
3071 {
3072 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3073 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
3074 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3075 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3076 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3077
3078 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3079 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3080 else
3081 {
3082 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3083 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3084 }
3085 break;
3086 }
3087
3088 case kIemNativeWhat_Tmp:
3089 case kIemNativeWhat_Arg:
3090 case kIemNativeWhat_rc:
3091 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3092 default:
3093 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3094 }
3095
3096 }
3097 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3098 {
3099 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3100 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3101 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3102 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3103 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3104 }
3105 else
3106 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3107 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3108 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3109 }
3110 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3111 return true;
3112}
3113
3114
3115DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3116
3117
3118#if 0
3119/**
3120 * Frees a register assignment of any type.
3121 *
3122 * @param pReNative The native recompile state.
3123 * @param idxHstReg The register to free.
3124 *
3125 * @note Does not update variables.
3126 */
3127DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3128{
3129 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3130 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3131 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3132 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3133 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3134 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3135 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3136 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3137 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3138 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3139 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3140 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3141 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3142 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3143
3144 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3145 /* no flushing, right:
3146 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3147 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3148 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3149 */
3150}
3151#endif
3152
3153
3154/**
3155 * Frees a temporary register.
3156 *
3157 * Any shadow copies of guest registers assigned to the host register will not
3158 * be flushed by this operation.
3159 */
3160DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3161{
3162 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3163 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3164 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3165 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3166 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3167}
3168
3169
3170/**
3171 * Frees a temporary immediate register.
3172 *
3173 * It is assumed that the call has not modified the register, so it still hold
3174 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3175 */
3176DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3177{
3178 iemNativeRegFreeTmp(pReNative, idxHstReg);
3179}
3180
3181
3182/**
3183 * Called right before emitting a call instruction to move anything important
3184 * out of call-volatile registers, free and flush the call-volatile registers,
3185 * optionally freeing argument variables.
3186 *
3187 * @returns New code buffer offset, UINT32_MAX on failure.
3188 * @param pReNative The native recompile state.
3189 * @param off The code buffer offset.
3190 * @param cArgs The number of arguments the function call takes.
3191 * It is presumed that the host register part of these have
3192 * been allocated as such already and won't need moving,
3193 * just freeing.
3194 */
3195DECL_HIDDEN_THROW(uint32_t)
3196iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3197{
3198 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
3199
3200 /*
3201 * Move anything important out of volatile registers.
3202 */
3203 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3204 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3205 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
3206#ifdef IEMNATIVE_REG_FIXED_TMP0
3207 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3208#endif
3209 & ~g_afIemNativeCallRegs[cArgs];
3210
3211 fRegsToMove &= pReNative->Core.bmHstRegs;
3212 if (!fRegsToMove)
3213 { /* likely */ }
3214 else
3215 {
3216 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
3217 while (fRegsToMove != 0)
3218 {
3219 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
3220 fRegsToMove &= ~RT_BIT_32(idxReg);
3221
3222 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3223 {
3224 case kIemNativeWhat_Var:
3225 {
3226 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3227 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3228 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3229 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3230 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
3231 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
3232 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3233 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3234 else
3235 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3236 continue;
3237 }
3238
3239 case kIemNativeWhat_Arg:
3240 AssertMsgFailed(("What?!?: %u\n", idxReg));
3241 continue;
3242
3243 case kIemNativeWhat_rc:
3244 case kIemNativeWhat_Tmp:
3245 AssertMsgFailed(("Missing free: %u\n", idxReg));
3246 continue;
3247
3248 case kIemNativeWhat_FixedTmp:
3249 case kIemNativeWhat_pVCpuFixed:
3250 case kIemNativeWhat_pCtxFixed:
3251 case kIemNativeWhat_FixedReserved:
3252 case kIemNativeWhat_Invalid:
3253 case kIemNativeWhat_End:
3254 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3255 }
3256 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3257 }
3258 }
3259
3260 /*
3261 * Do the actual freeing.
3262 */
3263 if (pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3264 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n", pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3265 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3266
3267 /* If there are guest register shadows in any call-volatile register, we
3268 have to clear the corrsponding guest register masks for each register. */
3269 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3270 if (fHstRegsWithGstShadow)
3271 {
3272 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3273 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
3274 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3275 do
3276 {
3277 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3278 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3279
3280 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
3281 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3282 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3283 } while (fHstRegsWithGstShadow != 0);
3284 }
3285
3286 return off;
3287}
3288
3289
3290/**
3291 * Flushes a set of guest register shadow copies.
3292 *
3293 * This is usually done after calling a threaded function or a C-implementation
3294 * of an instruction.
3295 *
3296 * @param pReNative The native recompile state.
3297 * @param fGstRegs Set of guest registers to flush.
3298 */
3299DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3300{
3301 /*
3302 * Reduce the mask by what's currently shadowed
3303 */
3304 fGstRegs &= pReNative->Core.bmGstRegShadows;
3305 if (fGstRegs)
3306 {
3307 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n",
3308 fGstRegs, pReNative->Core.bmGstRegShadows, pReNative->Core.bmGstRegShadows & ~fGstRegs));
3309 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3310 if (pReNative->Core.bmGstRegShadows)
3311 {
3312 /*
3313 * Partial.
3314 */
3315 do
3316 {
3317 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3318 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3319 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3320 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3321 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3322
3323 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3324 fGstRegs &= ~fInThisHstReg;
3325 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3326 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3327 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3328 } while (fGstRegs != 0);
3329 }
3330 else
3331 {
3332 /*
3333 * Clear all.
3334 */
3335 do
3336 {
3337 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3338 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3339 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3340 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3341 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3342
3343 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3344 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3345 } while (fGstRegs != 0);
3346 pReNative->Core.bmHstRegsWithGstShadow = 0;
3347 }
3348 }
3349}
3350
3351
3352/**
3353 * Flushes delayed write of a specific guest register.
3354 *
3355 * This must be called prior to calling CImpl functions and any helpers that use
3356 * the guest state (like raising exceptions) and such.
3357 *
3358 * This optimization has not yet been implemented. The first target would be
3359 * RIP updates, since these are the most common ones.
3360 */
3361DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3362 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
3363{
3364 RT_NOREF(pReNative, enmClass, idxReg);
3365 return off;
3366}
3367
3368
3369/**
3370 * Flushes any delayed guest register writes.
3371 *
3372 * This must be called prior to calling CImpl functions and any helpers that use
3373 * the guest state (like raising exceptions) and such.
3374 *
3375 * This optimization has not yet been implemented. The first target would be
3376 * RIP updates, since these are the most common ones.
3377 */
3378DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3379{
3380 RT_NOREF(pReNative, off);
3381 return off;
3382}
3383
3384
3385#ifdef VBOX_STRICT
3386/**
3387 * Does internal register allocator sanity checks.
3388 */
3389static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
3390{
3391 /*
3392 * Iterate host registers building a guest shadowing set.
3393 */
3394 uint64_t bmGstRegShadows = 0;
3395 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
3396 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
3397 while (bmHstRegsWithGstShadow)
3398 {
3399 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
3400 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3401 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3402
3403 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3404 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
3405 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
3406 bmGstRegShadows |= fThisGstRegShadows;
3407 while (fThisGstRegShadows)
3408 {
3409 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
3410 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
3411 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
3412 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
3413 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
3414 }
3415 }
3416 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
3417 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
3418 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
3419
3420 /*
3421 * Now the other way around, checking the guest to host index array.
3422 */
3423 bmHstRegsWithGstShadow = 0;
3424 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
3425 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3426 while (bmGstRegShadows)
3427 {
3428 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
3429 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3430 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
3431
3432 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3433 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
3434 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
3435 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
3436 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3437 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3438 }
3439 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
3440 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
3441 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
3442}
3443#endif
3444
3445
3446/*********************************************************************************************************************************
3447* Code Emitters (larger snippets) *
3448*********************************************************************************************************************************/
3449
3450/**
3451 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3452 * extending to 64-bit width.
3453 *
3454 * @returns New code buffer offset on success, UINT32_MAX on failure.
3455 * @param pReNative .
3456 * @param off The current code buffer position.
3457 * @param idxHstReg The host register to load the guest register value into.
3458 * @param enmGstReg The guest register to load.
3459 *
3460 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3461 * that is something the caller needs to do if applicable.
3462 */
3463DECL_HIDDEN_THROW(uint32_t)
3464iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3465{
3466 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3467 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3468
3469 switch (g_aGstShadowInfo[enmGstReg].cb)
3470 {
3471 case sizeof(uint64_t):
3472 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3473 case sizeof(uint32_t):
3474 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3475 case sizeof(uint16_t):
3476 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3477#if 0 /* not present in the table. */
3478 case sizeof(uint8_t):
3479 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3480#endif
3481 default:
3482 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3483 }
3484}
3485
3486
3487#ifdef VBOX_STRICT
3488/**
3489 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
3490 *
3491 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3492 * Trashes EFLAGS on AMD64.
3493 */
3494static uint32_t
3495iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
3496{
3497# ifdef RT_ARCH_AMD64
3498 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3499
3500 /* rol reg64, 32 */
3501 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3502 pbCodeBuf[off++] = 0xc1;
3503 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3504 pbCodeBuf[off++] = 32;
3505
3506 /* test reg32, ffffffffh */
3507 if (idxReg >= 8)
3508 pbCodeBuf[off++] = X86_OP_REX_B;
3509 pbCodeBuf[off++] = 0xf7;
3510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3511 pbCodeBuf[off++] = 0xff;
3512 pbCodeBuf[off++] = 0xff;
3513 pbCodeBuf[off++] = 0xff;
3514 pbCodeBuf[off++] = 0xff;
3515
3516 /* je/jz +1 */
3517 pbCodeBuf[off++] = 0x74;
3518 pbCodeBuf[off++] = 0x01;
3519
3520 /* int3 */
3521 pbCodeBuf[off++] = 0xcc;
3522
3523 /* rol reg64, 32 */
3524 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3525 pbCodeBuf[off++] = 0xc1;
3526 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3527 pbCodeBuf[off++] = 32;
3528
3529# elif defined(RT_ARCH_ARM64)
3530 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3531 /* lsr tmp0, reg64, #32 */
3532 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
3533 /* cbz tmp0, +1 */
3534 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3535 /* brk #0x1100 */
3536 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
3537
3538# else
3539# error "Port me!"
3540# endif
3541 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3542 return off;
3543}
3544#endif /* VBOX_STRICT */
3545
3546
3547#ifdef VBOX_STRICT
3548/**
3549 * Emitting code that checks that the content of register @a idxReg is the same
3550 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3551 * instruction if that's not the case.
3552 *
3553 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3554 * Trashes EFLAGS on AMD64.
3555 */
3556static uint32_t
3557iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3558{
3559# ifdef RT_ARCH_AMD64
3560 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3561
3562 /* cmp reg, [mem] */
3563 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3564 {
3565 if (idxReg >= 8)
3566 pbCodeBuf[off++] = X86_OP_REX_R;
3567 pbCodeBuf[off++] = 0x38;
3568 }
3569 else
3570 {
3571 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3572 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3573 else
3574 {
3575 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3576 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3577 else
3578 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3579 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3580 if (idxReg >= 8)
3581 pbCodeBuf[off++] = X86_OP_REX_R;
3582 }
3583 pbCodeBuf[off++] = 0x39;
3584 }
3585 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3586
3587 /* je/jz +1 */
3588 pbCodeBuf[off++] = 0x74;
3589 pbCodeBuf[off++] = 0x01;
3590
3591 /* int3 */
3592 pbCodeBuf[off++] = 0xcc;
3593
3594 /* For values smaller than the register size, we must check that the rest
3595 of the register is all zeros. */
3596 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3597 {
3598 /* test reg64, imm32 */
3599 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3600 pbCodeBuf[off++] = 0xf7;
3601 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3602 pbCodeBuf[off++] = 0;
3603 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3604 pbCodeBuf[off++] = 0xff;
3605 pbCodeBuf[off++] = 0xff;
3606
3607 /* je/jz +1 */
3608 pbCodeBuf[off++] = 0x74;
3609 pbCodeBuf[off++] = 0x01;
3610
3611 /* int3 */
3612 pbCodeBuf[off++] = 0xcc;
3613 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3614 }
3615 else
3616 {
3617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3618 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3619 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
3620 }
3621
3622# elif defined(RT_ARCH_ARM64)
3623 /* mov TMP0, [gstreg] */
3624 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3625
3626 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3627 /* sub tmp0, tmp0, idxReg */
3628 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3629 /* cbz tmp0, +1 */
3630 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3631 /* brk #0x1000+enmGstReg */
3632 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3633 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3634
3635# else
3636# error "Port me!"
3637# endif
3638 return off;
3639}
3640#endif /* VBOX_STRICT */
3641
3642
3643#ifdef VBOX_STRICT
3644/**
3645 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
3646 * important bits.
3647 *
3648 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3649 * Trashes EFLAGS on AMD64.
3650 */
3651static uint32_t
3652iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
3653{
3654 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3655 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
3656 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
3657 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
3658
3659#ifdef RT_ARCH_AMD64
3660 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3661
3662 /* je/jz +1 */
3663 pbCodeBuf[off++] = 0x74;
3664 pbCodeBuf[off++] = 0x01;
3665
3666 /* int3 */
3667 pbCodeBuf[off++] = 0xcc;
3668
3669# elif defined(RT_ARCH_ARM64)
3670 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3671
3672 /* b.eq +1 */
3673 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
3674 /* brk #0x2000 */
3675 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
3676
3677# else
3678# error "Port me!"
3679# endif
3680 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3681
3682 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3683 return off;
3684}
3685#endif /* VBOX_STRICT */
3686
3687
3688/**
3689 * Emits a code for checking the return code of a call and rcPassUp, returning
3690 * from the code if either are non-zero.
3691 */
3692DECL_HIDDEN_THROW(uint32_t)
3693iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3694{
3695#ifdef RT_ARCH_AMD64
3696 /*
3697 * AMD64: eax = call status code.
3698 */
3699
3700 /* edx = rcPassUp */
3701 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3702# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3703 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3704# endif
3705
3706 /* edx = eax | rcPassUp */
3707 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3708 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3709 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3710 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3711
3712 /* Jump to non-zero status return path. */
3713 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3714
3715 /* done. */
3716
3717#elif RT_ARCH_ARM64
3718 /*
3719 * ARM64: w0 = call status code.
3720 */
3721# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3722 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
3723# endif
3724 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3725
3726 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3727
3728 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3729
3730 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3731 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3732 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3733
3734#else
3735# error "port me"
3736#endif
3737 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3738 return off;
3739}
3740
3741
3742/**
3743 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3744 * raising a \#GP(0) if it isn't.
3745 *
3746 * @returns New code buffer offset, UINT32_MAX on failure.
3747 * @param pReNative The native recompile state.
3748 * @param off The code buffer offset.
3749 * @param idxAddrReg The host register with the address to check.
3750 * @param idxInstr The current instruction.
3751 */
3752DECL_HIDDEN_THROW(uint32_t)
3753iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3754{
3755 RT_NOREF(idxInstr);
3756
3757 /*
3758 * Make sure we don't have any outstanding guest register writes as we may
3759 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3760 */
3761 off = iemNativeRegFlushPendingWrites(pReNative, off);
3762
3763#ifdef RT_ARCH_AMD64
3764 /*
3765 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3766 * return raisexcpt();
3767 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3768 */
3769 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3770
3771 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3772 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3773 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3774 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3775
3776# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3777 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3778# else
3779 uint32_t const offFixup = off;
3780 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3781 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3782 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3783 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3784# endif
3785
3786 iemNativeRegFreeTmp(pReNative, iTmpReg);
3787
3788#elif defined(RT_ARCH_ARM64)
3789 /*
3790 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3791 * return raisexcpt();
3792 * ----
3793 * mov x1, 0x800000000000
3794 * add x1, x0, x1
3795 * cmp xzr, x1, lsr 48
3796 * and either:
3797 * b.ne .Lraisexcpt
3798 * or:
3799 * b.eq .Lnoexcept
3800 * movz x1, #instruction-number
3801 * b .Lraisexcpt
3802 * .Lnoexcept:
3803 */
3804 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3805
3806 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3807 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3808 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3809
3810# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3811 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3812# else
3813 uint32_t const offFixup = off;
3814 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3815 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3816 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3817 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3818# endif
3819
3820 iemNativeRegFreeTmp(pReNative, iTmpReg);
3821
3822#else
3823# error "Port me"
3824#endif
3825 return off;
3826}
3827
3828
3829/**
3830 * Emits code to check if the content of @a idxAddrReg is within the limit of
3831 * idxSegReg, raising a \#GP(0) if it isn't.
3832 *
3833 * @returns New code buffer offset; throws VBox status code on error.
3834 * @param pReNative The native recompile state.
3835 * @param off The code buffer offset.
3836 * @param idxAddrReg The host register (32-bit) with the address to
3837 * check.
3838 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3839 * against.
3840 * @param idxInstr The current instruction.
3841 */
3842DECL_HIDDEN_THROW(uint32_t)
3843iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3844 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3845{
3846 /*
3847 * Make sure we don't have any outstanding guest register writes as we may
3848 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3849 */
3850 off = iemNativeRegFlushPendingWrites(pReNative, off);
3851
3852 /** @todo implement expand down/whatnot checking */
3853 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3854
3855 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3856 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3857 kIemNativeGstRegUse_ForUpdate);
3858
3859 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3860
3861#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3862 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3863 RT_NOREF(idxInstr);
3864#else
3865 uint32_t const offFixup = off;
3866 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3867 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3868 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3869 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3870#endif
3871
3872 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3873 return off;
3874}
3875
3876
3877/**
3878 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3879 *
3880 * @returns The flush mask.
3881 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3882 * @param fGstShwFlush The starting flush mask.
3883 */
3884DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3885{
3886 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3887 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3888 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3889 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3890 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3891 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3892 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3893 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3894 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3895 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3896 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3897 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3898 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3899 return fGstShwFlush;
3900}
3901
3902
3903/**
3904 * Emits a call to a CImpl function or something similar.
3905 */
3906static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3907 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3908 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3909{
3910 /*
3911 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
3912 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
3913 */
3914 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
3915 fGstShwFlush
3916 | RT_BIT_64(kIemNativeGstReg_Pc)
3917 | RT_BIT_64(kIemNativeGstReg_EFlags));
3918 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3919
3920 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3921
3922 /*
3923 * Load the parameters.
3924 */
3925#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3926 /* Special code the hidden VBOXSTRICTRC pointer. */
3927 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3928 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3929 if (cAddParams > 0)
3930 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3931 if (cAddParams > 1)
3932 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3933 if (cAddParams > 2)
3934 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3935 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3936
3937#else
3938 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3939 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3940 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3941 if (cAddParams > 0)
3942 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3943 if (cAddParams > 1)
3944 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3945 if (cAddParams > 2)
3946# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3947 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3948# else
3949 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3950# endif
3951#endif
3952
3953 /*
3954 * Make the call.
3955 */
3956 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3957
3958#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3959 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3960#endif
3961
3962 /*
3963 * Check the status code.
3964 */
3965 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3966}
3967
3968
3969/**
3970 * Emits a call to a threaded worker function.
3971 */
3972static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3973{
3974 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3975 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3976
3977#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3978 /* The threaded function may throw / long jmp, so set current instruction
3979 number if we're counting. */
3980 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
3981#endif
3982
3983 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3984
3985#ifdef RT_ARCH_AMD64
3986 /* Load the parameters and emit the call. */
3987# ifdef RT_OS_WINDOWS
3988# ifndef VBOXSTRICTRC_STRICT_ENABLED
3989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3990 if (cParams > 0)
3991 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3992 if (cParams > 1)
3993 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3994 if (cParams > 2)
3995 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3996# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3998 if (cParams > 0)
3999 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
4000 if (cParams > 1)
4001 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
4002 if (cParams > 2)
4003 {
4004 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
4005 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
4006 }
4007 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4008# endif /* VBOXSTRICTRC_STRICT_ENABLED */
4009# else
4010 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4011 if (cParams > 0)
4012 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
4013 if (cParams > 1)
4014 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
4015 if (cParams > 2)
4016 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
4017# endif
4018
4019 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4020
4021# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
4022 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4023# endif
4024
4025#elif RT_ARCH_ARM64
4026 /*
4027 * ARM64:
4028 */
4029 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4030 if (cParams > 0)
4031 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
4032 if (cParams > 1)
4033 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
4034 if (cParams > 2)
4035 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
4036
4037 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
4038
4039#else
4040# error "port me"
4041#endif
4042
4043 /*
4044 * Check the status code.
4045 */
4046 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
4047
4048 return off;
4049}
4050
4051
4052/**
4053 * Emits the code at the RaiseGP0 label.
4054 */
4055static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4056{
4057 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
4058 if (idxLabel != UINT32_MAX)
4059 {
4060 iemNativeLabelDefine(pReNative, idxLabel, off);
4061
4062 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
4063 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4064#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4065 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
4066#endif
4067 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
4068
4069 /* jump back to the return sequence. */
4070 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4071 }
4072 return off;
4073}
4074
4075
4076/**
4077 * Emits the code at the ReturnWithFlags label (returns
4078 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
4079 */
4080static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4081{
4082 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
4083 if (idxLabel != UINT32_MAX)
4084 {
4085 iemNativeLabelDefine(pReNative, idxLabel, off);
4086
4087 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
4088
4089 /* jump back to the return sequence. */
4090 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4091 }
4092 return off;
4093}
4094
4095
4096/**
4097 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4098 */
4099static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4100{
4101 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4102 if (idxLabel != UINT32_MAX)
4103 {
4104 iemNativeLabelDefine(pReNative, idxLabel, off);
4105
4106 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
4107
4108 /* jump back to the return sequence. */
4109 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4110 }
4111 return off;
4112}
4113
4114
4115/**
4116 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
4117 */
4118static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4119{
4120 /*
4121 * Generate the rc + rcPassUp fiddling code if needed.
4122 */
4123 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4124 if (idxLabel != UINT32_MAX)
4125 {
4126 iemNativeLabelDefine(pReNative, idxLabel, off);
4127
4128 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
4129#ifdef RT_ARCH_AMD64
4130# ifdef RT_OS_WINDOWS
4131# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4132 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
4133# endif
4134 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4135 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
4136# else
4137 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4138 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
4139# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4140 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
4141# endif
4142# endif
4143# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4144 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
4145# endif
4146
4147#else
4148 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
4149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4150 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
4151#endif
4152
4153 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
4154 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4155 }
4156 return off;
4157}
4158
4159
4160/**
4161 * Emits a standard epilog.
4162 */
4163static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
4164{
4165 *pidxReturnLabel = UINT32_MAX;
4166
4167 /*
4168 * Successful return, so clear the return register (eax, w0).
4169 */
4170 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
4171
4172 /*
4173 * Define label for common return point.
4174 */
4175 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
4176 *pidxReturnLabel = idxReturn;
4177
4178 /*
4179 * Restore registers and return.
4180 */
4181#ifdef RT_ARCH_AMD64
4182 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4183
4184 /* Reposition esp at the r15 restore point. */
4185 pbCodeBuf[off++] = X86_OP_REX_W;
4186 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
4187 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
4188 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
4189
4190 /* Pop non-volatile registers and return */
4191 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
4192 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
4193 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
4194 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
4195 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
4196 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
4197 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
4198 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
4199# ifdef RT_OS_WINDOWS
4200 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
4201 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
4202# endif
4203 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
4204 pbCodeBuf[off++] = 0xc9; /* leave */
4205 pbCodeBuf[off++] = 0xc3; /* ret */
4206 pbCodeBuf[off++] = 0xcc; /* int3 poison */
4207
4208#elif RT_ARCH_ARM64
4209 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4210
4211 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
4212 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
4213 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4214 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4215 IEMNATIVE_FRAME_VAR_SIZE / 8);
4216 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
4217 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4218 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4219 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4220 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4221 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4222 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4223 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4224 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4225 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4226 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4227 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4228
4229 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
4230 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
4231 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
4232 IEMNATIVE_FRAME_SAVE_REG_SIZE);
4233
4234 /* retab / ret */
4235# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
4236 if (1)
4237 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
4238 else
4239# endif
4240 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
4241
4242#else
4243# error "port me"
4244#endif
4245 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4246
4247 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
4248}
4249
4250
4251/**
4252 * Emits a standard prolog.
4253 */
4254static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4255{
4256#ifdef RT_ARCH_AMD64
4257 /*
4258 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
4259 * reserving 64 bytes for stack variables plus 4 non-register argument
4260 * slots. Fixed register assignment: xBX = pReNative;
4261 *
4262 * Since we always do the same register spilling, we can use the same
4263 * unwind description for all the code.
4264 */
4265 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4266 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
4267 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
4268 pbCodeBuf[off++] = 0x8b;
4269 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
4270 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
4271 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
4272# ifdef RT_OS_WINDOWS
4273 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
4274 pbCodeBuf[off++] = 0x8b;
4275 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
4276 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
4277 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
4278# else
4279 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
4280 pbCodeBuf[off++] = 0x8b;
4281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
4282# endif
4283 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
4284 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
4285 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
4286 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
4287 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
4288 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
4289 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
4290 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
4291
4292 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
4293 X86_GREG_xSP,
4294 IEMNATIVE_FRAME_ALIGN_SIZE
4295 + IEMNATIVE_FRAME_VAR_SIZE
4296 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
4297 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
4298 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
4299 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
4300 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
4301
4302#elif RT_ARCH_ARM64
4303 /*
4304 * We set up a stack frame exactly like on x86, only we have to push the
4305 * return address our selves here. We save all non-volatile registers.
4306 */
4307 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4308
4309# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
4310 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
4311 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
4312 * in any way conditional, so just emitting this instructions now and hoping for the best... */
4313 /* pacibsp */
4314 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
4315# endif
4316
4317 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
4318 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
4319 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4320 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4321 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
4322 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
4323 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4324 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4325 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4326 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4327 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4328 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4329 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4330 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4331 /* Save the BP and LR (ret address) registers at the top of the frame. */
4332 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4333 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4334 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4335 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
4336 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
4337 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
4338
4339 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
4340 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
4341
4342 /* mov r28, r0 */
4343 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
4344 /* mov r27, r1 */
4345 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
4346
4347#else
4348# error "port me"
4349#endif
4350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4351 return off;
4352}
4353
4354
4355
4356
4357/*********************************************************************************************************************************
4358* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
4359*********************************************************************************************************************************/
4360
4361#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
4362 { \
4363 Assert(pReNative->Core.bmVars == 0); \
4364 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
4365 Assert(pReNative->Core.bmStack == 0); \
4366 pReNative->fMc = (a_fMcFlags); \
4367 pReNative->fCImpl = (a_fCImplFlags); \
4368 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
4369
4370/** We have to get to the end in recompilation mode, as otherwise we won't
4371 * generate code for all the IEM_MC_IF_XXX branches. */
4372#define IEM_MC_END() \
4373 iemNativeVarFreeAll(pReNative); \
4374 } return off
4375
4376
4377
4378/*********************************************************************************************************************************
4379* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
4380*********************************************************************************************************************************/
4381
4382#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
4383 pReNative->fMc = 0; \
4384 pReNative->fCImpl = (a_fFlags); \
4385 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
4386
4387
4388#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4389 pReNative->fMc = 0; \
4390 pReNative->fCImpl = (a_fFlags); \
4391 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
4392
4393DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4394 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4395 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
4396{
4397 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
4398}
4399
4400
4401#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4402 pReNative->fMc = 0; \
4403 pReNative->fCImpl = (a_fFlags); \
4404 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4405 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
4406
4407DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4408 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4409 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
4410{
4411 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
4412}
4413
4414
4415#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4416 pReNative->fMc = 0; \
4417 pReNative->fCImpl = (a_fFlags); \
4418 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4419 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
4420
4421DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4422 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4423 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
4424 uint64_t uArg2)
4425{
4426 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
4427}
4428
4429
4430
4431/*********************************************************************************************************************************
4432* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
4433*********************************************************************************************************************************/
4434
4435/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4436 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
4437DECL_INLINE_THROW(uint32_t)
4438iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4439{
4440 /*
4441 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4442 * return with special status code and make the execution loop deal with
4443 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4444 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4445 * could continue w/o interruption, it probably will drop into the
4446 * debugger, so not worth the effort of trying to services it here and we
4447 * just lump it in with the handling of the others.
4448 *
4449 * To simplify the code and the register state management even more (wrt
4450 * immediate in AND operation), we always update the flags and skip the
4451 * extra check associated conditional jump.
4452 */
4453 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4454 <= UINT32_MAX);
4455 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4456 kIemNativeGstRegUse_ForUpdate);
4457 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4458 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4459 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4460 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4461 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4462
4463 /* Free but don't flush the EFLAGS register. */
4464 iemNativeRegFreeTmp(pReNative, idxEflReg);
4465
4466 return off;
4467}
4468
4469
4470#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4471 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4472
4473#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4474 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4475 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4476
4477/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4478DECL_INLINE_THROW(uint32_t)
4479iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4480{
4481 /* Allocate a temporary PC register. */
4482 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4483
4484 /* Perform the addition and store the result. */
4485 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4486 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4487
4488 /* Free but don't flush the PC register. */
4489 iemNativeRegFreeTmp(pReNative, idxPcReg);
4490
4491 return off;
4492}
4493
4494
4495#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4496 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4497
4498#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4499 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4500 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4501
4502/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4503DECL_INLINE_THROW(uint32_t)
4504iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4505{
4506 /* Allocate a temporary PC register. */
4507 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4508
4509 /* Perform the addition and store the result. */
4510 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4511 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4512
4513 /* Free but don't flush the PC register. */
4514 iemNativeRegFreeTmp(pReNative, idxPcReg);
4515
4516 return off;
4517}
4518
4519
4520#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4521 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4522
4523#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4524 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4525 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4526
4527/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4528DECL_INLINE_THROW(uint32_t)
4529iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4530{
4531 /* Allocate a temporary PC register. */
4532 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4533
4534 /* Perform the addition and store the result. */
4535 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4536 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4537 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4538
4539 /* Free but don't flush the PC register. */
4540 iemNativeRegFreeTmp(pReNative, idxPcReg);
4541
4542 return off;
4543}
4544
4545
4546
4547/*********************************************************************************************************************************
4548* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4549*********************************************************************************************************************************/
4550
4551#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4552 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4553 (a_enmEffOpSize), pCallEntry->idxInstr)
4554
4555#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4556 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4557 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4558
4559#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4560 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4561 IEMMODE_16BIT, pCallEntry->idxInstr)
4562
4563#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4564 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4565 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4566
4567#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4568 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4569 IEMMODE_64BIT, pCallEntry->idxInstr)
4570
4571#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4572 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4573 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4574
4575/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4576 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4577 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4578DECL_INLINE_THROW(uint32_t)
4579iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4580 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4581{
4582 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4583
4584 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4585 off = iemNativeRegFlushPendingWrites(pReNative, off);
4586
4587 /* Allocate a temporary PC register. */
4588 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4589
4590 /* Perform the addition. */
4591 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4592
4593 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4594 {
4595 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4596 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4597 }
4598 else
4599 {
4600 /* Just truncate the result to 16-bit IP. */
4601 Assert(enmEffOpSize == IEMMODE_16BIT);
4602 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4603 }
4604 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4605
4606 /* Free but don't flush the PC register. */
4607 iemNativeRegFreeTmp(pReNative, idxPcReg);
4608
4609 return off;
4610}
4611
4612
4613#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4614 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4615 (a_enmEffOpSize), pCallEntry->idxInstr)
4616
4617#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4618 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4619 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4620
4621#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4622 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4623 IEMMODE_16BIT, pCallEntry->idxInstr)
4624
4625#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4626 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4627 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4628
4629#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4630 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4631 IEMMODE_32BIT, pCallEntry->idxInstr)
4632
4633#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4634 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4635 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4636
4637/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4638 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4639 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4640DECL_INLINE_THROW(uint32_t)
4641iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4642 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4643{
4644 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4645
4646 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4647 off = iemNativeRegFlushPendingWrites(pReNative, off);
4648
4649 /* Allocate a temporary PC register. */
4650 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4651
4652 /* Perform the addition. */
4653 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4654
4655 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4656 if (enmEffOpSize == IEMMODE_16BIT)
4657 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4658
4659 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4660 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4661
4662 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4663
4664 /* Free but don't flush the PC register. */
4665 iemNativeRegFreeTmp(pReNative, idxPcReg);
4666
4667 return off;
4668}
4669
4670
4671#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4672 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4673
4674#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4675 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4676 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4677
4678#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4679 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4680
4681#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4682 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4683 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4684
4685#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4686 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4687
4688#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4689 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4690 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4691
4692/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4693DECL_INLINE_THROW(uint32_t)
4694iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4695 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4696{
4697 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4698 off = iemNativeRegFlushPendingWrites(pReNative, off);
4699
4700 /* Allocate a temporary PC register. */
4701 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4702
4703 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4704 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4705 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4706 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4707 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4708
4709 /* Free but don't flush the PC register. */
4710 iemNativeRegFreeTmp(pReNative, idxPcReg);
4711
4712 return off;
4713}
4714
4715
4716
4717/*********************************************************************************************************************************
4718* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4719*********************************************************************************************************************************/
4720
4721/**
4722 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4723 *
4724 * @returns Pointer to the condition stack entry on success, NULL on failure
4725 * (too many nestings)
4726 */
4727DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4728{
4729 uint32_t const idxStack = pReNative->cCondDepth;
4730 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4731
4732 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4733 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4734
4735 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4736 pEntry->fInElse = false;
4737 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4738 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4739
4740 return pEntry;
4741}
4742
4743
4744/**
4745 * Start of the if-block, snapshotting the register and variable state.
4746 */
4747DECL_INLINE_THROW(void)
4748iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4749{
4750 Assert(offIfBlock != UINT32_MAX);
4751 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4752 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4753 Assert(!pEntry->fInElse);
4754
4755 /* Define the start of the IF block if request or for disassembly purposes. */
4756 if (idxLabelIf != UINT32_MAX)
4757 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4758#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4759 else
4760 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4761#else
4762 RT_NOREF(offIfBlock);
4763#endif
4764
4765 /* Copy the initial state so we can restore it in the 'else' block. */
4766 pEntry->InitialState = pReNative->Core;
4767}
4768
4769
4770#define IEM_MC_ELSE() } while (0); \
4771 off = iemNativeEmitElse(pReNative, off); \
4772 do {
4773
4774/** Emits code related to IEM_MC_ELSE. */
4775DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4776{
4777 /* Check sanity and get the conditional stack entry. */
4778 Assert(off != UINT32_MAX);
4779 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4780 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4781 Assert(!pEntry->fInElse);
4782
4783 /* Jump to the endif */
4784 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4785
4786 /* Define the else label and enter the else part of the condition. */
4787 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4788 pEntry->fInElse = true;
4789
4790 /* Snapshot the core state so we can do a merge at the endif and restore
4791 the snapshot we took at the start of the if-block. */
4792 pEntry->IfFinalState = pReNative->Core;
4793 pReNative->Core = pEntry->InitialState;
4794
4795 return off;
4796}
4797
4798
4799#define IEM_MC_ENDIF() } while (0); \
4800 off = iemNativeEmitEndIf(pReNative, off)
4801
4802/** Emits code related to IEM_MC_ENDIF. */
4803DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4804{
4805 /* Check sanity and get the conditional stack entry. */
4806 Assert(off != UINT32_MAX);
4807 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4808 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4809
4810 /*
4811 * Now we have find common group with the core state at the end of the
4812 * if-final. Use the smallest common denominator and just drop anything
4813 * that isn't the same in both states.
4814 */
4815 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4816 * which is why we're doing this at the end of the else-block.
4817 * But we'd need more info about future for that to be worth the effort. */
4818 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4819 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4820 {
4821 /* shadow guest stuff first. */
4822 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4823 if (fGstRegs)
4824 {
4825 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4826 do
4827 {
4828 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4829 fGstRegs &= ~RT_BIT_64(idxGstReg);
4830
4831 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4832 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4833 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4834 {
4835 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
4836 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4837 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4838 }
4839 } while (fGstRegs);
4840 }
4841 else
4842 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4843
4844 /* Check variables next. For now we must require them to be identical
4845 or stuff we can recreate. */
4846 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4847 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4848 if (fVars)
4849 {
4850 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4851 do
4852 {
4853 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4854 fVars &= ~RT_BIT_32(idxVar);
4855
4856 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4857 {
4858 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4859 continue;
4860 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4861 {
4862 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4863 if (idxHstReg != UINT8_MAX)
4864 {
4865 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4866 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4867 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4868 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4869 }
4870 continue;
4871 }
4872 }
4873 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4874 continue;
4875
4876 /* Irreconcilable, so drop it. */
4877 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4878 if (idxHstReg != UINT8_MAX)
4879 {
4880 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4881 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4882 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4883 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4884 }
4885 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4886 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4887 } while (fVars);
4888 }
4889
4890 /* Finally, check that the host register allocations matches. */
4891 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4892 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4893 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4894 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4895 }
4896
4897 /*
4898 * Define the endif label and maybe the else one if we're still in the 'if' part.
4899 */
4900 if (!pEntry->fInElse)
4901 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4902 else
4903 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4904 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4905
4906 /* Pop the conditional stack.*/
4907 pReNative->cCondDepth -= 1;
4908
4909 return off;
4910}
4911
4912
4913#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4914 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4915 do {
4916
4917/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4918DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4919{
4920 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4921
4922 /* Get the eflags. */
4923 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4924 kIemNativeGstRegUse_ReadOnly);
4925
4926 /* Test and jump. */
4927 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4928
4929 /* Free but don't flush the EFlags register. */
4930 iemNativeRegFreeTmp(pReNative, idxEflReg);
4931
4932 /* Make a copy of the core state now as we start the if-block. */
4933 iemNativeCondStartIfBlock(pReNative, off);
4934
4935 return off;
4936}
4937
4938
4939#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4940 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4941 do {
4942
4943/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4944DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4945{
4946 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4947
4948 /* Get the eflags. */
4949 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4950 kIemNativeGstRegUse_ReadOnly);
4951
4952 /* Test and jump. */
4953 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4954
4955 /* Free but don't flush the EFlags register. */
4956 iemNativeRegFreeTmp(pReNative, idxEflReg);
4957
4958 /* Make a copy of the core state now as we start the if-block. */
4959 iemNativeCondStartIfBlock(pReNative, off);
4960
4961 return off;
4962}
4963
4964
4965#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4966 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4967 do {
4968
4969/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4970DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4971{
4972 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4973
4974 /* Get the eflags. */
4975 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4976 kIemNativeGstRegUse_ReadOnly);
4977
4978 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4979 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4980
4981 /* Test and jump. */
4982 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4983
4984 /* Free but don't flush the EFlags register. */
4985 iemNativeRegFreeTmp(pReNative, idxEflReg);
4986
4987 /* Make a copy of the core state now as we start the if-block. */
4988 iemNativeCondStartIfBlock(pReNative, off);
4989
4990 return off;
4991}
4992
4993
4994#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4995 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4996 do {
4997
4998/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4999DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
5000{
5001 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5002
5003 /* Get the eflags. */
5004 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5005 kIemNativeGstRegUse_ReadOnly);
5006
5007 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5008 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5009
5010 /* Test and jump. */
5011 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5012
5013 /* Free but don't flush the EFlags register. */
5014 iemNativeRegFreeTmp(pReNative, idxEflReg);
5015
5016 /* Make a copy of the core state now as we start the if-block. */
5017 iemNativeCondStartIfBlock(pReNative, off);
5018
5019 return off;
5020}
5021
5022
5023#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
5024 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
5025 do {
5026
5027#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
5028 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
5029 do {
5030
5031/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
5032DECL_INLINE_THROW(uint32_t)
5033iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5034 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5035{
5036 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5037
5038 /* Get the eflags. */
5039 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5040 kIemNativeGstRegUse_ReadOnly);
5041
5042 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5043 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5044
5045 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5046 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5047 Assert(iBitNo1 != iBitNo2);
5048
5049#ifdef RT_ARCH_AMD64
5050 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
5051
5052 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5053 if (iBitNo1 > iBitNo2)
5054 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5055 else
5056 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5057 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5058
5059#elif defined(RT_ARCH_ARM64)
5060 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5061 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5062
5063 /* and tmpreg, eflreg, #1<<iBitNo1 */
5064 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5065
5066 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5067 if (iBitNo1 > iBitNo2)
5068 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5069 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5070 else
5071 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5072 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5073
5074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5075
5076#else
5077# error "Port me"
5078#endif
5079
5080 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5081 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5082 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5083
5084 /* Free but don't flush the EFlags and tmp registers. */
5085 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5086 iemNativeRegFreeTmp(pReNative, idxEflReg);
5087
5088 /* Make a copy of the core state now as we start the if-block. */
5089 iemNativeCondStartIfBlock(pReNative, off);
5090
5091 return off;
5092}
5093
5094
5095#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
5096 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
5097 do {
5098
5099#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
5100 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
5101 do {
5102
5103/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
5104 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
5105DECL_INLINE_THROW(uint32_t)
5106iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
5107 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5108{
5109 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5110
5111 /* We need an if-block label for the non-inverted variant. */
5112 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
5113 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
5114
5115 /* Get the eflags. */
5116 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5117 kIemNativeGstRegUse_ReadOnly);
5118
5119 /* Translate the flag masks to bit numbers. */
5120 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5121 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5122
5123 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5124 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5125 Assert(iBitNo1 != iBitNo);
5126
5127 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5128 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5129 Assert(iBitNo2 != iBitNo);
5130 Assert(iBitNo2 != iBitNo1);
5131
5132#ifdef RT_ARCH_AMD64
5133 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
5134#elif defined(RT_ARCH_ARM64)
5135 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5136#endif
5137
5138 /* Check for the lone bit first. */
5139 if (!fInverted)
5140 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5141 else
5142 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
5143
5144 /* Then extract and compare the other two bits. */
5145#ifdef RT_ARCH_AMD64
5146 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5147 if (iBitNo1 > iBitNo2)
5148 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5149 else
5150 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5151 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5152
5153#elif defined(RT_ARCH_ARM64)
5154 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5155
5156 /* and tmpreg, eflreg, #1<<iBitNo1 */
5157 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5158
5159 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5160 if (iBitNo1 > iBitNo2)
5161 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5162 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5163 else
5164 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5165 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5166
5167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5168
5169#else
5170# error "Port me"
5171#endif
5172
5173 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5174 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5175 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5176
5177 /* Free but don't flush the EFlags and tmp registers. */
5178 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5179 iemNativeRegFreeTmp(pReNative, idxEflReg);
5180
5181 /* Make a copy of the core state now as we start the if-block. */
5182 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
5183
5184 return off;
5185}
5186
5187
5188#define IEM_MC_IF_CX_IS_NZ() \
5189 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
5190 do {
5191
5192/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5193DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5194{
5195 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5196
5197 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5198 kIemNativeGstRegUse_ReadOnly);
5199 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5200 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5201
5202 iemNativeCondStartIfBlock(pReNative, off);
5203 return off;
5204}
5205
5206
5207#define IEM_MC_IF_ECX_IS_NZ() \
5208 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
5209 do {
5210
5211#define IEM_MC_IF_RCX_IS_NZ() \
5212 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
5213 do {
5214
5215/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
5216DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
5217{
5218 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5219
5220 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5221 kIemNativeGstRegUse_ReadOnly);
5222 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5223 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5224
5225 iemNativeCondStartIfBlock(pReNative, off);
5226 return off;
5227}
5228
5229
5230#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5231 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
5232 do {
5233
5234#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5235 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
5236 do {
5237
5238/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5239DECL_INLINE_THROW(uint32_t)
5240iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
5241{
5242 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5243
5244 /* We have to load both RCX and EFLAGS before we can start branching,
5245 otherwise we'll end up in the else-block with an inconsistent
5246 register allocator state.
5247 Doing EFLAGS first as it's more likely to be loaded, right? */
5248 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5249 kIemNativeGstRegUse_ReadOnly);
5250 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5251 kIemNativeGstRegUse_ReadOnly);
5252
5253 /** @todo we could reduce this to a single branch instruction by spending a
5254 * temporary register and some setnz stuff. Not sure if loops are
5255 * worth it. */
5256 /* Check CX. */
5257 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5258
5259 /* Check the EFlags bit. */
5260 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5261 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5262 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5263 !fCheckIfSet /*fJmpIfSet*/);
5264
5265 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5266 iemNativeRegFreeTmp(pReNative, idxEflReg);
5267
5268 iemNativeCondStartIfBlock(pReNative, off);
5269 return off;
5270}
5271
5272
5273#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5274 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
5275 do {
5276
5277#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5278 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
5279 do {
5280
5281#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5282 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
5283 do {
5284
5285#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5286 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
5287 do {
5288
5289/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
5290 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
5291 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
5292 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
5293DECL_INLINE_THROW(uint32_t)
5294iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5295 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
5296{
5297 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5298
5299 /* We have to load both RCX and EFLAGS before we can start branching,
5300 otherwise we'll end up in the else-block with an inconsistent
5301 register allocator state.
5302 Doing EFLAGS first as it's more likely to be loaded, right? */
5303 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5304 kIemNativeGstRegUse_ReadOnly);
5305 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5306 kIemNativeGstRegUse_ReadOnly);
5307
5308 /** @todo we could reduce this to a single branch instruction by spending a
5309 * temporary register and some setnz stuff. Not sure if loops are
5310 * worth it. */
5311 /* Check RCX/ECX. */
5312 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5313
5314 /* Check the EFlags bit. */
5315 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5316 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5317 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5318 !fCheckIfSet /*fJmpIfSet*/);
5319
5320 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5321 iemNativeRegFreeTmp(pReNative, idxEflReg);
5322
5323 iemNativeCondStartIfBlock(pReNative, off);
5324 return off;
5325}
5326
5327
5328
5329/*********************************************************************************************************************************
5330* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
5331*********************************************************************************************************************************/
5332/** Number of hidden arguments for CIMPL calls.
5333 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
5334#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5335# define IEM_CIMPL_HIDDEN_ARGS 3
5336#else
5337# define IEM_CIMPL_HIDDEN_ARGS 2
5338#endif
5339
5340#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
5341 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
5342
5343#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
5344 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
5345
5346#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
5347 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
5348
5349#define IEM_MC_LOCAL(a_Type, a_Name) \
5350 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
5351
5352#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
5353 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
5354
5355
5356/**
5357 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
5358 */
5359DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
5360{
5361 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
5362 return IEM_CIMPL_HIDDEN_ARGS;
5363 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
5364 return 1;
5365 return 0;
5366}
5367
5368
5369/**
5370 * Internal work that allocates a variable with kind set to
5371 * kIemNativeVarKind_Invalid and no current stack allocation.
5372 *
5373 * The kind will either be set by the caller or later when the variable is first
5374 * assigned a value.
5375 */
5376static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5377{
5378 Assert(cbType > 0 && cbType <= 64);
5379 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
5380 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
5381 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
5382 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5383 pReNative->Core.aVars[idxVar].cbVar = cbType;
5384 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5385 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5386 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
5387 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
5388 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
5389 pReNative->Core.aVars[idxVar].u.uValue = 0;
5390 return idxVar;
5391}
5392
5393
5394/**
5395 * Internal work that allocates an argument variable w/o setting enmKind.
5396 */
5397static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5398{
5399 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
5400 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5401 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
5402
5403 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5404 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
5405 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
5406 return idxVar;
5407}
5408
5409
5410/**
5411 * Gets the stack slot for a stack variable, allocating one if necessary.
5412 *
5413 * Calling this function implies that the stack slot will contain a valid
5414 * variable value. The caller deals with any register currently assigned to the
5415 * variable, typically by spilling it into the stack slot.
5416 *
5417 * @returns The stack slot number.
5418 * @param pReNative The recompiler state.
5419 * @param idxVar The variable.
5420 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
5421 */
5422static uint8_t iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5423{
5424 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5425 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5426
5427 /* Already got a slot? */
5428 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5429 if (idxStackSlot != UINT8_MAX)
5430 {
5431 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
5432 return idxStackSlot;
5433 }
5434
5435 /*
5436 * A single slot is easy to allocate.
5437 * Allocate them from the top end, closest to BP, to reduce the displacement.
5438 */
5439 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5440 {
5441 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
5442 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5443 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5444 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
5445 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
5446 return (uint8_t)iSlot;
5447 }
5448
5449 /*
5450 * We need more than one stack slot.
5451 *
5452 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
5453 */
5454 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5455 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5456 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5457 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5458 uint32_t bmStack = ~pReNative->Core.bmStack;
5459 while (bmStack != UINT32_MAX)
5460 {
5461/** @todo allocate from the top to reduce BP displacement. */
5462 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5463 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5464 if (!(iSlot & fBitAlignMask))
5465 {
5466 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5467 {
5468 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5469 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
5470 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
5471 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
5472 return (uint8_t)iSlot;
5473 }
5474 }
5475 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5476 }
5477 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5478}
5479
5480
5481/**
5482 * Changes the variable to a stack variable.
5483 *
5484 * Currently this is s only possible to do the first time the variable is used,
5485 * switching later is can be implemented but not done.
5486 *
5487 * @param pReNative The recompiler state.
5488 * @param idxVar The variable.
5489 * @throws VERR_IEM_VAR_IPE_2
5490 */
5491static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5492{
5493 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5494 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5495 {
5496 /* We could in theory transition from immediate to stack as well, but it
5497 would involve the caller doing work storing the value on the stack. So,
5498 till that's required we only allow transition from invalid. */
5499 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5500 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5501 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5502 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
5503
5504 /* Note! We don't allocate a stack slot here, that's only done when a
5505 slot is actually needed to hold a variable value. */
5506 }
5507}
5508
5509
5510/**
5511 * Sets it to a variable with a constant value.
5512 *
5513 * This does not require stack storage as we know the value and can always
5514 * reload it, unless of course it's referenced.
5515 *
5516 * @param pReNative The recompiler state.
5517 * @param idxVar The variable.
5518 * @param uValue The immediate value.
5519 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5520 */
5521static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5522{
5523 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5524 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5525 {
5526 /* Only simple transitions for now. */
5527 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5528 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5529 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5530 }
5531 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5532
5533 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5534}
5535
5536
5537/**
5538 * Sets the variable to a reference (pointer) to @a idxOtherVar.
5539 *
5540 * This does not require stack storage as we know the value and can always
5541 * reload it. Loading is postponed till needed.
5542 *
5543 * @param pReNative The recompiler state.
5544 * @param idxVar The variable.
5545 * @param idxOtherVar The variable to take the (stack) address of.
5546 *
5547 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5548 */
5549static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5550{
5551 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5552 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5553
5554 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5555 {
5556 /* Only simple transitions for now. */
5557 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5558 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5559 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
5560 }
5561 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5562
5563 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5564
5565 /* Update the other variable, ensure it's a stack variable. */
5566 /** @todo handle variables with const values... that'll go boom now. */
5567 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5568 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5569}
5570
5571
5572/**
5573 * Sets the variable to a reference (pointer) to a guest register reference.
5574 *
5575 * This does not require stack storage as we know the value and can always
5576 * reload it. Loading is postponed till needed.
5577 *
5578 * @param pReNative The recompiler state.
5579 * @param idxVar The variable.
5580 * @param enmRegClass The class guest registers to reference.
5581 * @param idxReg The register within @a enmRegClass to reference.
5582 *
5583 * @throws VERR_IEM_VAR_IPE_2
5584 */
5585static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5586 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
5587{
5588 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5589
5590 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
5591 {
5592 /* Only simple transitions for now. */
5593 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5594 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5595 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
5596 }
5597 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5598
5599 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
5600 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
5601}
5602
5603
5604DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5605{
5606 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5607}
5608
5609
5610DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5611{
5612 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5613 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5614 return idxVar;
5615}
5616
5617
5618DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5619{
5620 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5621 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5622 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5623 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5624
5625 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5626 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5627 return idxArgVar;
5628}
5629
5630
5631DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5632{
5633 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5634 /* Don't set to stack now, leave that to the first use as for instance
5635 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
5636 return idxVar;
5637}
5638
5639
5640DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5641{
5642 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5643 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5644 return idxVar;
5645}
5646
5647
5648/**
5649 * Makes sure variable @a idxVar has a register assigned to it.
5650 *
5651 * @returns The host register number.
5652 * @param pReNative The recompiler state.
5653 * @param idxVar The variable.
5654 * @param poff Pointer to the instruction buffer offset.
5655 * In case a register needs to be freed up or the value
5656 * loaded off the stack.
5657 * @param fInitialized Set if the variable must already have been initialized.
5658 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
5659 * the case.
5660 */
5661DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5662 uint32_t *poff, bool fInitialized = false)
5663{
5664 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5665 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
5666/** @todo we must mark the variable as active and add a release function to
5667 * mark it as inactive, otherwise temporary register allocations may
5668 * cause the variable to be spilled onto the stack. */
5669
5670 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5671 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5672 {
5673 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5674 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5675 return idxReg;
5676 }
5677
5678 /*
5679 * If the kind of variable has not yet been set, default to 'stack'.
5680 */
5681 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
5682 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5683 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
5684 iemNativeVarSetKindToStack(pReNative, idxVar);
5685
5686 /*
5687 * We have to allocate a register for the variable, even if its a stack one
5688 * as we don't know if there are modification being made to it before its
5689 * finalized (todo: analyze and insert hints about that?).
5690 *
5691 * If we can, we try get the correct register for argument variables. This
5692 * is assuming that most argument variables are fetched as close as possible
5693 * to the actual call, so that there aren't any interfering hidden calls
5694 * (memory accesses, etc) inbetween.
5695 *
5696 * If we cannot or it's a variable, we make sure no argument registers
5697 * that will be used by this MC block will be allocated here, and we always
5698 * prefer non-volatile registers to avoid needing to spill stuff for internal
5699 * call.
5700 */
5701 /** @todo Detect too early argument value fetches and warn about hidden
5702 * calls causing less optimal code to be generated in the python script. */
5703
5704 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5705 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5706 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5707 {
5708 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5709 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
5710 }
5711 else
5712 {
5713 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5714 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5715 & ~pReNative->Core.bmHstRegsWithGstShadow
5716 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5717 & fNotArgsMask;
5718 if (fRegs)
5719 {
5720 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5721 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5722 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5724 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5725 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
5726 }
5727 else
5728 {
5729 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5730 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5731 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5732 Log11(("iemNativeVarAllocRegister: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
5733 }
5734 }
5735 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5736 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5737
5738 /*
5739 * Load it off the stack if we've got a stack slot.
5740 */
5741 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5742 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
5743 {
5744 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
5745 switch (pReNative->Core.aVars[idxVar].cbVar)
5746 {
5747 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
5748 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
5749 case 3: AssertFailed(); RT_FALL_THRU();
5750 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
5751 default: AssertFailed(); RT_FALL_THRU();
5752 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
5753 }
5754 }
5755 else
5756 {
5757 Assert(idxStackSlot == UINT8_MAX);
5758 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5759 }
5760 return idxReg;
5761}
5762
5763
5764/**
5765 * The value of variable @a idxVar will be written in full to the @a enmGstReg
5766 * guest register.
5767 *
5768 * This function makes sure there is a register for it and sets it to be the
5769 * current shadow copy of @a enmGstReg.
5770 *
5771 * @returns The host register number.
5772 * @param pReNative The recompiler state.
5773 * @param idxVar The variable.
5774 * @param enmGstReg The guest register this variable will be written to
5775 * after this call.
5776 * @param poff Pointer to the instruction buffer offset.
5777 * In case a register needs to be freed up or if the
5778 * variable content needs to be loaded off the stack.
5779 *
5780 * @note We DO NOT expect @a idxVar to be an argument variable,
5781 * because we can only in the commit stage of an instruction when this
5782 * function is used.
5783 */
5784DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegisterForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5785 IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
5786{
5787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5788 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
5789 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
5790 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
5791 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
5792 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
5793 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5794
5795 /*
5796 * This shouldn't ever be used for arguments, unless it's in a weird else
5797 * branch that doesn't do any calling and even then it's questionable.
5798 *
5799 * However, in case someone writes crazy wrong MC code and does register
5800 * updates before making calls, just use the regular register allocator to
5801 * ensure we get a register suitable for the intended argument number.
5802 */
5803 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarAllocRegister(pReNative, idxVar, poff));
5804
5805 /*
5806 * If there is already a register for the variable, we transfer/set the
5807 * guest shadow copy assignment to it.
5808 */
5809 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5810 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5811 {
5812 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
5813 {
5814 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
5815 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
5816 Log12(("iemNativeVarAllocRegisterForGuestReg: Moved %s for guest %s into %s for full write\n",
5817 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
5818 }
5819 else
5820 {
5821 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
5822 Log12(("iemNativeVarAllocRegisterForGuestReg: Marking %s as copy of guest %s (full write)\n",
5823 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
5824 }
5825 /** @todo figure this one out. We need some way of making sure the register isn't
5826 * modified after this point, just in case we start writing crappy MC code. */
5827 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
5828 return idxReg;
5829 }
5830 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
5831
5832 /*
5833 * Because this is supposed to be the commit stage, we're just tag along with the
5834 * temporary register allocator and upgrade it to a variable register.
5835 */
5836 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
5837 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
5838 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
5839 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
5840 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
5841 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5842
5843 /*
5844 * Now we need to load the register value.
5845 */
5846 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
5847 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
5848 else
5849 {
5850 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
5851 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
5852 switch (pReNative->Core.aVars[idxVar].cbVar)
5853 {
5854 case sizeof(uint64_t):
5855 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
5856 break;
5857 case sizeof(uint32_t):
5858 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
5859 break;
5860 case sizeof(uint16_t):
5861 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
5862 break;
5863 case sizeof(uint8_t):
5864 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
5865 break;
5866 default:
5867 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5868 }
5869 }
5870
5871 return idxReg;
5872}
5873
5874
5875/**
5876 * Sets the host register for @a idxVarRc to @a idxReg.
5877 *
5878 * The register must not be allocated. Any guest register shadowing will be
5879 * implictly dropped by this call.
5880 *
5881 * The variable must not have any register associated with it (causes
5882 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
5883 * implied.
5884 *
5885 * @returns idxReg
5886 * @param pReNative The recompiler state.
5887 * @param idxVar The variable.
5888 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
5889 * @param off For recording in debug info.
5890 *
5891 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
5892 */
5893DECL_INLINE_THROW(uint8_t) iemNativeVarSetRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
5894{
5895 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5896 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5897 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
5898 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
5899
5900 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
5901 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5902
5903 iemNativeVarSetKindToStack(pReNative, idxVar);
5904 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5905
5906 return idxReg;
5907}
5908
5909
5910/**
5911 * Worker that frees the stack slots for variable @a idxVar if any allocated.
5912 *
5913 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
5914 */
5915DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5916{
5917 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5918 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
5919 {
5920 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
5921 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
5922 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
5923 Assert(cSlots > 0);
5924 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
5925 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
5926 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
5927 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5928 }
5929 else
5930 Assert(idxStackSlot == UINT8_MAX);
5931}
5932
5933
5934/**
5935 * Worker that frees a single variable.
5936 *
5937 * ASSUMES that @a idxVar is valid.
5938 */
5939DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5940{
5941 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
5942 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
5943
5944 /* Free the host register first if any assigned. */
5945 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5946 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5947 {
5948 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5949 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5950 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5951 }
5952
5953 /* Free argument mapping. */
5954 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5955 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
5956 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
5957
5958 /* Free the stack slots. */
5959 iemNativeVarFreeStackSlots(pReNative, idxVar);
5960
5961 /* Free the actual variable. */
5962 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5963 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5964}
5965
5966
5967/**
5968 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
5969 */
5970DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
5971{
5972 while (bmVars != 0)
5973 {
5974 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
5975 bmVars &= ~RT_BIT_32(idxVar);
5976
5977#if 1 /** @todo optimize by simplifying this later... */
5978 iemNativeVarFreeOneWorker(pReNative, idxVar);
5979#else
5980 /* Only need to free the host register, the rest is done as bulk updates below. */
5981 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5982 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5983 {
5984 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5985 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5986 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5987 }
5988#endif
5989 }
5990#if 0 /** @todo optimize by simplifying this later... */
5991 pReNative->Core.bmVars = 0;
5992 pReNative->Core.bmStack = 0;
5993 pReNative->Core.u64ArgVars = UINT64_MAX;
5994#endif
5995}
5996
5997
5998/**
5999 * This is called by IEM_MC_END() to clean up all variables.
6000 */
6001DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
6002{
6003 uint32_t const bmVars = pReNative->Core.bmVars;
6004 if (bmVars != 0)
6005 iemNativeVarFreeAllSlow(pReNative, bmVars);
6006 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6007 Assert(pReNative->Core.bmStack == 0);
6008}
6009
6010
6011#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
6012
6013/**
6014 * This is called by IEM_MC_FREE_LOCAL.
6015 */
6016DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6017{
6018 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6019 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
6020 iemNativeVarFreeOneWorker(pReNative, idxVar);
6021}
6022
6023
6024#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
6025
6026/**
6027 * This is called by IEM_MC_FREE_ARG.
6028 */
6029DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6030{
6031 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6032 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
6033 iemNativeVarFreeOneWorker(pReNative, idxVar);
6034}
6035
6036
6037
6038/*********************************************************************************************************************************
6039* Emitters for IEM_MC_CALL_CIMPL_XXX *
6040*********************************************************************************************************************************/
6041
6042/**
6043 * Emits code to load a reference to the given guest register into @a idxGprDst.
6044 */
6045DECL_INLINE_THROW(uint32_t)
6046iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
6047 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
6048{
6049 /*
6050 * Get the offset relative to the CPUMCTX structure.
6051 */
6052 uint32_t offCpumCtx;
6053 switch (enmClass)
6054 {
6055 case kIemNativeGstRegRef_Gpr:
6056 Assert(idxRegInClass < 16);
6057 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
6058 break;
6059
6060 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
6061 Assert(idxRegInClass < 4);
6062 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
6063 break;
6064
6065 case kIemNativeGstRegRef_EFlags:
6066 Assert(idxRegInClass == 0);
6067 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
6068 break;
6069
6070 case kIemNativeGstRegRef_MxCsr:
6071 Assert(idxRegInClass == 0);
6072 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
6073 break;
6074
6075 case kIemNativeGstRegRef_FpuReg:
6076 Assert(idxRegInClass < 8);
6077 AssertFailed(); /** @todo what kind of indexing? */
6078 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
6079 break;
6080
6081 case kIemNativeGstRegRef_MReg:
6082 Assert(idxRegInClass < 8);
6083 AssertFailed(); /** @todo what kind of indexing? */
6084 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
6085 break;
6086
6087 case kIemNativeGstRegRef_XReg:
6088 Assert(idxRegInClass < 16);
6089 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
6090 break;
6091
6092 default:
6093 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
6094 }
6095
6096 /*
6097 * Load the value into the destination register.
6098 */
6099#ifdef RT_ARCH_AMD64
6100 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
6101
6102#elif defined(RT_ARCH_ARM64)
6103 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6104 Assert(offCpumCtx < 4096);
6105 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
6106
6107#else
6108# error "Port me!"
6109#endif
6110
6111 return off;
6112}
6113
6114
6115/**
6116 * Common code for CIMPL and AIMPL calls.
6117 *
6118 * These are calls that uses argument variables and such. They should not be
6119 * confused with internal calls required to implement an MC operation,
6120 * like a TLB load and similar.
6121 *
6122 * Upon return all that is left to do is to load any hidden arguments and
6123 * perform the call. All argument variables are freed.
6124 *
6125 * @returns New code buffer offset; throws VBox status code on error.
6126 * @param pReNative The native recompile state.
6127 * @param off The code buffer offset.
6128 * @param cArgs The total nubmer of arguments (includes hidden
6129 * count).
6130 * @param cHiddenArgs The number of hidden arguments. The hidden
6131 * arguments must not have any variable declared for
6132 * them, whereas all the regular arguments must
6133 * (tstIEMCheckMc ensures this).
6134 */
6135DECL_HIDDEN_THROW(uint32_t)
6136iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
6137{
6138#ifdef VBOX_STRICT
6139 /*
6140 * Assert sanity.
6141 */
6142 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
6143 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
6144 for (unsigned i = 0; i < cHiddenArgs; i++)
6145 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
6146 for (unsigned i = cHiddenArgs; i < cArgs; i++)
6147 {
6148 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
6149 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
6150 }
6151#endif
6152
6153 /*
6154 * Before we do anything else, go over variables that are referenced and
6155 * make sure they are not in a register.
6156 */
6157 uint32_t bmVars = pReNative->Core.bmVars;
6158 if (bmVars)
6159 do
6160 {
6161 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
6162 bmVars &= ~RT_BIT_32(idxVar);
6163
6164 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
6165 {
6166 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
6167 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
6168 {
6169 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6170 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
6171 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
6172 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
6173 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
6174
6175 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6176 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
6177 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
6178 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
6179 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
6180 }
6181 }
6182 } while (bmVars != 0);
6183
6184 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
6185
6186 /*
6187 * First, go over the host registers that will be used for arguments and make
6188 * sure they either hold the desired argument or are free.
6189 */
6190 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
6191 for (uint32_t i = 0; i < cRegArgs; i++)
6192 {
6193 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6194 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6195 {
6196 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
6197 {
6198 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
6199 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6200 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
6201 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6202 if (uArgNo == i)
6203 { /* prefect */ }
6204 /* The variable allocator logic should make sure this is impossible,
6205 except for when the return register is used as a parameter (ARM,
6206 but not x86). */
6207#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
6208 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
6209 {
6210# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
6211# error "Implement this"
6212# endif
6213 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
6214 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
6215 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
6216 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
6217 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
6218 }
6219#endif
6220 else
6221 {
6222 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
6223
6224 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6225 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
6226 else
6227 {
6228 /* just free it, can be reloaded if used again */
6229 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6230 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
6231 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
6232 }
6233 }
6234 }
6235 else
6236 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
6237 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
6238 }
6239 }
6240
6241 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
6242
6243#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
6244 /*
6245 * If there are any stack arguments, make sure they are in their place as well.
6246 *
6247 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
6248 * the caller) be loading it later and it must be free (see first loop).
6249 */
6250 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
6251 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
6252 {
6253 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6254 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
6255 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6256 {
6257 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
6258 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
6259 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
6260 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6261 }
6262 else
6263 {
6264 /* Use ARG0 as temp for stuff we need registers for. */
6265 switch (pReNative->Core.aVars[idxVar].enmKind)
6266 {
6267 case kIemNativeVarKind_Stack:
6268 {
6269 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6270 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6271 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
6272 iemNativeStackCalcBpDisp(idxStackSlot));
6273 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6274 continue;
6275 }
6276
6277 case kIemNativeVarKind_Immediate:
6278 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
6279 continue;
6280
6281 case kIemNativeVarKind_VarRef:
6282 {
6283 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
6284 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
6285 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
6286 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
6287 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
6288 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
6289 {
6290 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
6291 pReNative->Core.aVars[idxOtherVar].idxReg = UINT8_MAX;
6292 }
6293 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
6294 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6295 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
6296 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6297 continue;
6298 }
6299
6300 case kIemNativeVarKind_GstRegRef:
6301 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
6302 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
6303 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
6304 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6305 continue;
6306
6307 case kIemNativeVarKind_Invalid:
6308 case kIemNativeVarKind_End:
6309 break;
6310 }
6311 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6312 }
6313 }
6314#else
6315 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
6316#endif
6317
6318 /*
6319 * Make sure the argument variables are loaded into their respective registers.
6320 *
6321 * We can optimize this by ASSUMING that any register allocations are for
6322 * registeres that have already been loaded and are ready. The previous step
6323 * saw to that.
6324 */
6325 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
6326 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6327 {
6328 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6329 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6330 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
6331 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
6332 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
6333 else
6334 {
6335 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6336 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6337 {
6338 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6339 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
6340 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
6341 | RT_BIT_32(idxArgReg);
6342 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
6343 }
6344 else
6345 {
6346 /* Use ARG0 as temp for stuff we need registers for. */
6347 switch (pReNative->Core.aVars[idxVar].enmKind)
6348 {
6349 case kIemNativeVarKind_Stack:
6350 {
6351 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6352 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6353 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
6354 continue;
6355 }
6356
6357 case kIemNativeVarKind_Immediate:
6358 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
6359 continue;
6360
6361 case kIemNativeVarKind_VarRef:
6362 {
6363 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
6364 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
6365 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
6366 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
6367 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
6368 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
6369 {
6370 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
6371 pReNative->Core.aVars[idxOtherVar].idxReg = UINT8_MAX;
6372 }
6373 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
6374 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
6375 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
6376 continue;
6377 }
6378
6379 case kIemNativeVarKind_GstRegRef:
6380 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
6381 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
6382 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
6383 continue;
6384
6385 case kIemNativeVarKind_Invalid:
6386 case kIemNativeVarKind_End:
6387 break;
6388 }
6389 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6390 }
6391 }
6392 }
6393#ifdef VBOX_STRICT
6394 else
6395 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6396 {
6397 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
6398 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
6399 }
6400#endif
6401
6402 /*
6403 * Free all argument variables (simplified).
6404 * Their lifetime always expires with the call they are for.
6405 */
6406 /** @todo Make the python script check that arguments aren't used after
6407 * IEM_MC_CALL_XXXX. */
6408 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
6409 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
6410 * an argument value. There is also some FPU stuff. */
6411 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
6412 {
6413 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6414 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6415
6416 /* no need to free registers: */
6417 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
6418 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
6419 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
6420 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
6421 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
6422 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
6423
6424 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
6425 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6426 iemNativeVarFreeStackSlots(pReNative, idxVar);
6427 }
6428 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6429
6430 /*
6431 * Flush volatile registers as we make the call.
6432 */
6433 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
6434
6435 return off;
6436}
6437
6438
6439/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
6440DECL_HIDDEN_THROW(uint32_t)
6441iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
6442 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
6443
6444{
6445 /*
6446 * Do all the call setup and cleanup.
6447 */
6448 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
6449
6450 /*
6451 * Load the two or three hidden arguments.
6452 */
6453#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6454 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6455 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6456 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
6457#else
6458 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6459 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
6460#endif
6461
6462 /*
6463 * Make the call and check the return code.
6464 *
6465 * Shadow PC copies are always flushed here, other stuff depends on flags.
6466 * Segment and general purpose registers are explictily flushed via the
6467 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
6468 * macros.
6469 */
6470 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
6471#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6472 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6473#endif
6474 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
6475 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
6476 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6477 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6478
6479 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6480}
6481
6482
6483#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6484 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
6485
6486/** Emits code for IEM_MC_CALL_CIMPL_1. */
6487DECL_INLINE_THROW(uint32_t)
6488iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6489 uintptr_t pfnCImpl, uint8_t idxArg0)
6490{
6491 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6492 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
6493}
6494
6495
6496#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6497 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
6498
6499/** Emits code for IEM_MC_CALL_CIMPL_2. */
6500DECL_INLINE_THROW(uint32_t)
6501iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6502 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
6503{
6504 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6505 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6506 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
6507}
6508
6509
6510#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6511 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6512 (uintptr_t)a_pfnCImpl, a0, a1, a2)
6513
6514/** Emits code for IEM_MC_CALL_CIMPL_3. */
6515DECL_INLINE_THROW(uint32_t)
6516iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6517 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
6518{
6519 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6520 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6521 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6522 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
6523}
6524
6525
6526#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
6527 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6528 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
6529
6530/** Emits code for IEM_MC_CALL_CIMPL_4. */
6531DECL_INLINE_THROW(uint32_t)
6532iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6533 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
6534{
6535 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6536 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6537 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6538 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
6539 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
6540}
6541
6542
6543#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
6544 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6545 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
6546
6547/** Emits code for IEM_MC_CALL_CIMPL_4. */
6548DECL_INLINE_THROW(uint32_t)
6549iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6550 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
6551{
6552 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6553 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6554 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6555 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
6556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
6557 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
6558}
6559
6560
6561/** Recompiler debugging: Flush guest register shadow copies. */
6562#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
6563
6564
6565
6566/*********************************************************************************************************************************
6567* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
6568*********************************************************************************************************************************/
6569
6570/**
6571 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
6572 */
6573DECL_INLINE_THROW(uint32_t)
6574iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6575 uintptr_t pfnAImpl, uint8_t cArgs)
6576{
6577 if (idxVarRc != UINT8_MAX)
6578 {
6579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
6580 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
6581 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
6582 }
6583
6584 /*
6585 * Do all the call setup and cleanup.
6586 */
6587 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
6588
6589 /*
6590 * Make the call and update the return code variable if we've got one.
6591 */
6592 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
6593 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
6594 {
6595pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
6596 iemNativeVarSetRegister(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
6597 }
6598
6599 return off;
6600}
6601
6602
6603
6604#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
6605 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
6606
6607#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
6608 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
6609
6610/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
6611DECL_INLINE_THROW(uint32_t)
6612iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
6613{
6614 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
6615}
6616
6617
6618#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
6619 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
6620
6621#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
6622 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
6623
6624/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
6625DECL_INLINE_THROW(uint32_t)
6626iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
6627{
6628 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6629 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
6630}
6631
6632
6633#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
6634 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
6635
6636#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
6637 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
6638
6639/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
6640DECL_INLINE_THROW(uint32_t)
6641iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6642 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
6643{
6644 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6645 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6646 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
6647}
6648
6649
6650#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
6651 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
6652
6653#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
6654 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
6655
6656/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
6657DECL_INLINE_THROW(uint32_t)
6658iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6659 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
6660{
6661 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6662 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6663 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
6664 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
6665}
6666
6667
6668#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
6669 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
6670
6671#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
6672 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
6673
6674/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
6675DECL_INLINE_THROW(uint32_t)
6676iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6677 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
6678{
6679 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6680 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6681 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
6682 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
6683 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
6684}
6685
6686
6687
6688/*********************************************************************************************************************************
6689* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
6690*********************************************************************************************************************************/
6691
6692#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
6693 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx)
6694
6695/** Emits code for IEM_MC_FETCH_GREG_U8. */
6696DECL_INLINE_THROW(uint32_t)
6697iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx)
6698{
6699 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6700 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint8_t));
6701 Assert(iGRegEx < 20);
6702
6703 /* Same discussion as in iemNativeEmitFetchGregU16 */
6704 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
6705 kIemNativeGstRegUse_ReadOnly);
6706
6707 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6708 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6709
6710 if (iGRegEx < 16)
6711 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
6712 else
6713 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
6714
6715 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6716 return off;
6717}
6718
6719
6720#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
6721 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
6722
6723/** Emits code for IEM_MC_FETCH_GREG_U16. */
6724DECL_INLINE_THROW(uint32_t)
6725iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6726{
6727 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6728 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
6729 Assert(iGReg < 16);
6730
6731 /*
6732 * We can either just load the low 16-bit of the GPR into a host register
6733 * for the variable, or we can do so via a shadow copy host register. The
6734 * latter will avoid having to reload it if it's being stored later, but
6735 * will waste a host register if it isn't touched again. Since we don't
6736 * know what going to happen, we choose the latter for now.
6737 */
6738 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6739 kIemNativeGstRegUse_ReadOnly);
6740
6741 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6742 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6743 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
6744
6745 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6746 return off;
6747}
6748
6749
6750#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
6751 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg)
6752
6753/** Emits code for IEM_MC_FETCH_GREG_U32. */
6754DECL_INLINE_THROW(uint32_t)
6755iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6756{
6757 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6758 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint32_t));
6759 Assert(iGReg < 16);
6760
6761 /*
6762 * We can either just load the low 16-bit of the GPR into a host register
6763 * for the variable, or we can do so via a shadow copy host register. The
6764 * latter will avoid having to reload it if it's being stored later, but
6765 * will waste a host register if it isn't touched again. Since we don't
6766 * know what going to happen, we choose the latter for now.
6767 */
6768 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6769 kIemNativeGstRegUse_ReadOnly);
6770
6771 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6772 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6773 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
6774
6775 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6776 return off;
6777}
6778
6779
6780
6781/*********************************************************************************************************************************
6782* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
6783*********************************************************************************************************************************/
6784
6785#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
6786 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
6787
6788/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
6789DECL_INLINE_THROW(uint32_t)
6790iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
6791{
6792 Assert(iGRegEx < 20);
6793 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
6794 kIemNativeGstRegUse_ForUpdate);
6795#ifdef RT_ARCH_AMD64
6796 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6797
6798 /* To the lowest byte of the register: mov r8, imm8 */
6799 if (iGRegEx < 16)
6800 {
6801 if (idxGstTmpReg >= 8)
6802 pbCodeBuf[off++] = X86_OP_REX_B;
6803 else if (idxGstTmpReg >= 4)
6804 pbCodeBuf[off++] = X86_OP_REX;
6805 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6806 pbCodeBuf[off++] = u8Value;
6807 }
6808 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
6809 else if (idxGstTmpReg < 4)
6810 {
6811 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
6812 pbCodeBuf[off++] = u8Value;
6813 }
6814 else
6815 {
6816 /* ror reg64, 8 */
6817 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6818 pbCodeBuf[off++] = 0xc1;
6819 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6820 pbCodeBuf[off++] = 8;
6821
6822 /* mov reg8, imm8 */
6823 if (idxGstTmpReg >= 8)
6824 pbCodeBuf[off++] = X86_OP_REX_B;
6825 else if (idxGstTmpReg >= 4)
6826 pbCodeBuf[off++] = X86_OP_REX;
6827 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6828 pbCodeBuf[off++] = u8Value;
6829
6830 /* rol reg64, 8 */
6831 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6832 pbCodeBuf[off++] = 0xc1;
6833 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
6834 pbCodeBuf[off++] = 8;
6835 }
6836
6837#elif defined(RT_ARCH_ARM64)
6838 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
6839 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6840 if (iGRegEx < 16)
6841 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
6842 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
6843 else
6844 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
6845 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
6846 iemNativeRegFreeTmp(pReNative, idxImmReg);
6847
6848#else
6849# error "Port me!"
6850#endif
6851
6852 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6853
6854 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
6855
6856 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6857 return off;
6858}
6859
6860
6861#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
6862 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
6863
6864/** Emits code for IEM_MC_STORE_GREG_U16. */
6865DECL_INLINE_THROW(uint32_t)
6866iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
6867{
6868 Assert(iGReg < 16);
6869 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6870 kIemNativeGstRegUse_ForUpdate);
6871#ifdef RT_ARCH_AMD64
6872 /* mov reg16, imm16 */
6873 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6874 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6875 if (idxGstTmpReg >= 8)
6876 pbCodeBuf[off++] = X86_OP_REX_B;
6877 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
6878 pbCodeBuf[off++] = RT_BYTE1(uValue);
6879 pbCodeBuf[off++] = RT_BYTE2(uValue);
6880
6881#elif defined(RT_ARCH_ARM64)
6882 /* movk xdst, #uValue, lsl #0 */
6883 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6884 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
6885
6886#else
6887# error "Port me!"
6888#endif
6889
6890 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6891
6892 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6893 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6894 return off;
6895}
6896
6897
6898#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
6899 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
6900
6901/** Emits code for IEM_MC_STORE_GREG_U16. */
6902DECL_INLINE_THROW(uint32_t)
6903iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6904{
6905 Assert(iGReg < 16);
6906 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6907
6908 /*
6909 * If it's a constant value (unlikely) we treat this as a
6910 * IEM_MC_STORE_GREG_U16_CONST statement.
6911 */
6912 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6913 { /* likely */ }
6914 else
6915 {
6916 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6917 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6918 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6919 }
6920
6921 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6922 kIemNativeGstRegUse_ForUpdate);
6923
6924#ifdef RT_ARCH_AMD64
6925 /* mov reg16, reg16 or [mem16] */
6926 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6927 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6928 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6929 {
6930 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
6931 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
6932 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
6933 pbCodeBuf[off++] = 0x8b;
6934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
6935 }
6936 else
6937 {
6938 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
6939 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6940 if (idxGstTmpReg >= 8)
6941 pbCodeBuf[off++] = X86_OP_REX_R;
6942 pbCodeBuf[off++] = 0x8b;
6943 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
6944 }
6945
6946#elif defined(RT_ARCH_ARM64)
6947 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6948 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxValueVar, &off, true /*fInitialized*/);
6949 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6950 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
6951
6952#else
6953# error "Port me!"
6954#endif
6955
6956 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6957
6958 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6959 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6960 return off;
6961}
6962
6963
6964#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
6965 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
6966
6967/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
6968DECL_INLINE_THROW(uint32_t)
6969iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
6970{
6971 Assert(iGReg < 16);
6972 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6973 kIemNativeGstRegUse_ForFullWrite);
6974 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
6975 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6976 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6977 return off;
6978}
6979
6980
6981#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
6982 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
6983
6984/** Emits code for IEM_MC_STORE_GREG_U32. */
6985DECL_INLINE_THROW(uint32_t)
6986iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6987{
6988 Assert(iGReg < 16);
6989 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6990
6991 /*
6992 * If it's a constant value (unlikely) we treat this as a
6993 * IEM_MC_STORE_GREG_U32_CONST statement.
6994 */
6995 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6996 { /* likely */ }
6997 else
6998 {
6999 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
7000 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7001 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
7002 }
7003
7004 /*
7005 * For the rest we allocate a guest register for the variable and writes
7006 * it to the CPUMCTX structure.
7007 */
7008 uint8_t const idxVarReg = iemNativeVarAllocRegisterForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
7009 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7010#ifdef VBOX_STRICT
7011 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
7012#endif
7013 return off;
7014}
7015
7016
7017
7018#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
7019 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
7020
7021/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
7022DECL_INLINE_THROW(uint32_t)
7023iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
7024{
7025 Assert(iGReg < 16);
7026 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7027 kIemNativeGstRegUse_ForUpdate);
7028 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
7029 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7030 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7031 return off;
7032}
7033
7034
7035/*********************************************************************************************************************************
7036* General purpose register manipulation (add, sub). *
7037*********************************************************************************************************************************/
7038
7039#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
7040 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
7041
7042/** Emits code for IEM_MC_SUB_GREG_U16. */
7043DECL_INLINE_THROW(uint32_t)
7044iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
7045{
7046 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7047 kIemNativeGstRegUse_ForUpdate);
7048
7049#ifdef RT_ARCH_AMD64
7050 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
7051 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7052 if (idxGstTmpReg >= 8)
7053 pbCodeBuf[off++] = X86_OP_REX_B;
7054 if (uSubtrahend)
7055 {
7056 pbCodeBuf[off++] = 0xff; /* dec */
7057 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7058 }
7059 else
7060 {
7061 pbCodeBuf[off++] = 0x81;
7062 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
7063 pbCodeBuf[off++] = uSubtrahend;
7064 pbCodeBuf[off++] = 0;
7065 }
7066
7067#else
7068 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7069 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7070
7071 /* sub tmp, gstgrp, uSubtrahend */
7072 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
7073
7074 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
7075 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
7076
7077 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7078#endif
7079
7080 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7081
7082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7083
7084 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7085 return off;
7086}
7087
7088
7089#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
7090 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
7091
7092#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
7093 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
7094
7095/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
7096DECL_INLINE_THROW(uint32_t)
7097iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
7098{
7099 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
7100 kIemNativeGstRegUse_ForUpdate);
7101
7102#ifdef RT_ARCH_AMD64
7103 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7104 if (f64Bit)
7105 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
7106 else if (idxGstTmpReg >= 8)
7107 pbCodeBuf[off++] = X86_OP_REX_B;
7108 if (uSubtrahend == 1)
7109 {
7110 /* dec */
7111 pbCodeBuf[off++] = 0xff;
7112 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
7113 }
7114 else if (uSubtrahend < 128)
7115 {
7116 pbCodeBuf[off++] = 0x83; /* sub */
7117 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
7118 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
7119 }
7120 else
7121 {
7122 pbCodeBuf[off++] = 0x81; /* sub */
7123 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
7124 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
7125 pbCodeBuf[off++] = 0;
7126 pbCodeBuf[off++] = 0;
7127 pbCodeBuf[off++] = 0;
7128 }
7129
7130#else
7131 /* sub tmp, gstgrp, uSubtrahend */
7132 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7133 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
7134
7135#endif
7136
7137 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7138
7139 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
7140
7141 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
7142 return off;
7143}
7144
7145
7146
7147/*********************************************************************************************************************************
7148* Register references. *
7149*********************************************************************************************************************************/
7150
7151#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
7152 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
7153
7154#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
7155 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
7156
7157/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
7158DECL_INLINE_THROW(uint32_t)
7159iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
7160{
7161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
7162 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
7163 Assert(iGRegEx < 20);
7164
7165 if (iGRegEx < 16)
7166 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
7167 else
7168 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
7169
7170 /* If we've delayed writing back the register value, flush it now. */
7171 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
7172
7173 /* If it's not a const reference we need to flush the shadow copy of the register now. */
7174 if (!fConst)
7175 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
7176
7177 return off;
7178}
7179
7180#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
7181 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
7182
7183#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
7184 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
7185
7186#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
7187 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
7188
7189#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
7190 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
7191
7192#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
7193 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
7194
7195#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
7196 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
7197
7198#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
7199 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
7200
7201#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
7202 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
7203
7204#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
7205 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
7206
7207#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
7208 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
7209
7210/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
7211DECL_INLINE_THROW(uint32_t)
7212iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
7213{
7214 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
7215 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
7216 Assert(iGReg < 16);
7217
7218 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
7219
7220 /* If we've delayed writing back the register value, flush it now. */
7221 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
7222
7223 /* If it's not a const reference we need to flush the shadow copy of the register now. */
7224 if (!fConst)
7225 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
7226
7227 return off;
7228}
7229
7230
7231#define IEM_MC_REF_EFLAGS(a_pEFlags) \
7232 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
7233
7234/** Handles IEM_MC_REF_EFLAGS. */
7235DECL_INLINE_THROW(uint32_t)
7236iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
7237{
7238 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
7239 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
7240
7241 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
7242
7243 /* If we've delayed writing back the register value, flush it now. */
7244 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
7245
7246 /* If there is a shadow copy of guest EFLAGS, flush it now. */
7247 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
7248
7249 return off;
7250}
7251
7252
7253/*********************************************************************************************************************************
7254* Effective Address Calculation *
7255*********************************************************************************************************************************/
7256#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
7257 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
7258
7259/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
7260 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
7261DECL_INLINE_THROW(uint32_t)
7262iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7263 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
7264{
7265 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
7266
7267 /*
7268 * Handle the disp16 form with no registers first.
7269 *
7270 * Convert to an immediate value, as that'll delay the register allocation
7271 * and assignment till the memory access / call / whatever and we can use
7272 * a more appropriate register (or none at all).
7273 */
7274 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
7275 {
7276 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
7277 return off;
7278 }
7279
7280 /* Determin the displacment. */
7281 uint16_t u16EffAddr;
7282 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
7283 {
7284 case 0: u16EffAddr = 0; break;
7285 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
7286 case 2: u16EffAddr = u16Disp; break;
7287 default: AssertFailedStmt(u16EffAddr = 0);
7288 }
7289
7290 /* Determine the registers involved. */
7291 uint8_t idxGstRegBase;
7292 uint8_t idxGstRegIndex;
7293 switch (bRm & X86_MODRM_RM_MASK)
7294 {
7295 case 0:
7296 idxGstRegBase = X86_GREG_xBX;
7297 idxGstRegIndex = X86_GREG_xSI;
7298 break;
7299 case 1:
7300 idxGstRegBase = X86_GREG_xBX;
7301 idxGstRegIndex = X86_GREG_xDI;
7302 break;
7303 case 2:
7304 idxGstRegBase = X86_GREG_xBP;
7305 idxGstRegIndex = X86_GREG_xSI;
7306 break;
7307 case 3:
7308 idxGstRegBase = X86_GREG_xBP;
7309 idxGstRegIndex = X86_GREG_xDI;
7310 break;
7311 case 4:
7312 idxGstRegBase = X86_GREG_xSI;
7313 idxGstRegIndex = UINT8_MAX;
7314 break;
7315 case 5:
7316 idxGstRegBase = X86_GREG_xDI;
7317 idxGstRegIndex = UINT8_MAX;
7318 break;
7319 case 6:
7320 idxGstRegBase = X86_GREG_xBP;
7321 idxGstRegIndex = UINT8_MAX;
7322 break;
7323#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
7324 default:
7325#endif
7326 case 7:
7327 idxGstRegBase = X86_GREG_xBX;
7328 idxGstRegIndex = UINT8_MAX;
7329 break;
7330 }
7331
7332 /*
7333 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
7334 */
7335 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
7336 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
7337 kIemNativeGstRegUse_ReadOnly);
7338 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
7339 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
7340 kIemNativeGstRegUse_ReadOnly)
7341 : UINT8_MAX;
7342#ifdef RT_ARCH_AMD64
7343 if (idxRegIndex == UINT8_MAX)
7344 {
7345 if (u16EffAddr == 0)
7346 {
7347 /* movxz ret, base */
7348 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
7349 }
7350 else
7351 {
7352 /* lea ret32, [base64 + disp32] */
7353 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
7354 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7355 if (idxRegRet >= 8 || idxRegBase >= 8)
7356 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
7357 pbCodeBuf[off++] = 0x8d;
7358 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7359 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
7360 else
7361 {
7362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
7363 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7364 }
7365 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
7366 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
7367 pbCodeBuf[off++] = 0;
7368 pbCodeBuf[off++] = 0;
7369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7370
7371 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
7372 }
7373 }
7374 else
7375 {
7376 /* lea ret32, [index64 + base64 (+ disp32)] */
7377 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7378 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7379 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7380 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7381 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7382 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
7383 pbCodeBuf[off++] = 0x8d;
7384 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
7385 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7386 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
7387 if (bMod == X86_MOD_MEM4)
7388 {
7389 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
7390 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
7391 pbCodeBuf[off++] = 0;
7392 pbCodeBuf[off++] = 0;
7393 }
7394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7395 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
7396 }
7397
7398#elif defined(RT_ARCH_ARM64)
7399 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7400 if (u16EffAddr == 0)
7401 {
7402 if (idxRegIndex == UINT8_MAX)
7403 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
7404 else
7405 {
7406 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
7407 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
7408 }
7409 }
7410 else
7411 {
7412 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
7413 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
7414 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
7415 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
7416 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
7417 else
7418 {
7419 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
7420 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
7421 }
7422 if (idxRegIndex != UINT8_MAX)
7423 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
7424 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
7425 }
7426
7427#else
7428# error "port me"
7429#endif
7430
7431 if (idxRegIndex != UINT8_MAX)
7432 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7433 iemNativeRegFreeTmp(pReNative, idxRegBase);
7434 return off;
7435}
7436
7437
7438#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
7439 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
7440
7441/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
7442 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
7443DECL_INLINE_THROW(uint32_t)
7444iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7445 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
7446{
7447 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
7448
7449 /*
7450 * Handle the disp32 form with no registers first.
7451 *
7452 * Convert to an immediate value, as that'll delay the register allocation
7453 * and assignment till the memory access / call / whatever and we can use
7454 * a more appropriate register (or none at all).
7455 */
7456 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
7457 {
7458 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
7459 return off;
7460 }
7461
7462 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
7463 uint32_t u32EffAddr = 0;
7464 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
7465 {
7466 case 0: break;
7467 case 1: u32EffAddr = (int8_t)u32Disp; break;
7468 case 2: u32EffAddr = u32Disp; break;
7469 default: AssertFailed();
7470 }
7471
7472 /* Get the register (or SIB) value. */
7473 uint8_t idxGstRegBase = UINT8_MAX;
7474 uint8_t idxGstRegIndex = UINT8_MAX;
7475 uint8_t cShiftIndex = 0;
7476 switch (bRm & X86_MODRM_RM_MASK)
7477 {
7478 case 0: idxGstRegBase = X86_GREG_xAX; break;
7479 case 1: idxGstRegBase = X86_GREG_xCX; break;
7480 case 2: idxGstRegBase = X86_GREG_xDX; break;
7481 case 3: idxGstRegBase = X86_GREG_xBX; break;
7482 case 4: /* SIB */
7483 {
7484 /* index /w scaling . */
7485 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
7486 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
7487 {
7488 case 0: idxGstRegIndex = X86_GREG_xAX; break;
7489 case 1: idxGstRegIndex = X86_GREG_xCX; break;
7490 case 2: idxGstRegIndex = X86_GREG_xDX; break;
7491 case 3: idxGstRegIndex = X86_GREG_xBX; break;
7492 case 4: cShiftIndex = 0; /*no index*/ break;
7493 case 5: idxGstRegIndex = X86_GREG_xBP; break;
7494 case 6: idxGstRegIndex = X86_GREG_xSI; break;
7495 case 7: idxGstRegIndex = X86_GREG_xDI; break;
7496 }
7497
7498 /* base */
7499 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
7500 {
7501 case 0: idxGstRegBase = X86_GREG_xAX; break;
7502 case 1: idxGstRegBase = X86_GREG_xCX; break;
7503 case 2: idxGstRegBase = X86_GREG_xDX; break;
7504 case 3: idxGstRegBase = X86_GREG_xBX; break;
7505 case 4:
7506 idxGstRegBase = X86_GREG_xSP;
7507 u32EffAddr += uSibAndRspOffset >> 8;
7508 break;
7509 case 5:
7510 if ((bRm & X86_MODRM_MOD_MASK) != 0)
7511 idxGstRegBase = X86_GREG_xBP;
7512 else
7513 {
7514 Assert(u32EffAddr == 0);
7515 u32EffAddr = u32Disp;
7516 }
7517 break;
7518 case 6: idxGstRegBase = X86_GREG_xSI; break;
7519 case 7: idxGstRegBase = X86_GREG_xDI; break;
7520 }
7521 break;
7522 }
7523 case 5: idxGstRegBase = X86_GREG_xBP; break;
7524 case 6: idxGstRegBase = X86_GREG_xSI; break;
7525 case 7: idxGstRegBase = X86_GREG_xDI; break;
7526 }
7527
7528 /*
7529 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
7530 * the start of the function.
7531 */
7532 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
7533 {
7534 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
7535 return off;
7536 }
7537
7538 /*
7539 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
7540 */
7541 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
7542 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
7543 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
7544 kIemNativeGstRegUse_ReadOnly);
7545 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
7546 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
7547 kIemNativeGstRegUse_ReadOnly);
7548
7549 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
7550 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
7551 {
7552 idxRegBase = idxRegIndex;
7553 idxRegIndex = UINT8_MAX;
7554 }
7555
7556#ifdef RT_ARCH_AMD64
7557 if (idxRegIndex == UINT8_MAX)
7558 {
7559 if (u32EffAddr == 0)
7560 {
7561 /* mov ret, base */
7562 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7563 }
7564 else
7565 {
7566 /* lea ret32, [base64 + disp32] */
7567 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
7568 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7569 if (idxRegRet >= 8 || idxRegBase >= 8)
7570 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
7571 pbCodeBuf[off++] = 0x8d;
7572 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7573 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7574 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7575 else
7576 {
7577 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7578 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7579 }
7580 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7581 if (bMod == X86_MOD_MEM4)
7582 {
7583 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7584 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7585 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7586 }
7587 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7588 }
7589 }
7590 else
7591 {
7592 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7593 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7594 if (idxRegBase == UINT8_MAX)
7595 {
7596 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
7597 if (idxRegRet >= 8 || idxRegIndex >= 8)
7598 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7599 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
7600 pbCodeBuf[off++] = 0x8d;
7601 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7602 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7603 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7604 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7605 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7606 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7607 }
7608 else
7609 {
7610 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7611 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7612 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7613 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7614 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
7615 pbCodeBuf[off++] = 0x8d;
7616 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7617 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7618 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7619 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7620 if (bMod != X86_MOD_MEM0)
7621 {
7622 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7623 if (bMod == X86_MOD_MEM4)
7624 {
7625 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7626 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7627 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7628 }
7629 }
7630 }
7631 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7632 }
7633
7634#elif defined(RT_ARCH_ARM64)
7635 if (u32EffAddr == 0)
7636 {
7637 if (idxRegIndex == UINT8_MAX)
7638 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7639 else if (idxRegBase == UINT8_MAX)
7640 {
7641 if (cShiftIndex == 0)
7642 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
7643 else
7644 {
7645 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7646 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
7647 }
7648 }
7649 else
7650 {
7651 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7652 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7653 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
7654 }
7655 }
7656 else
7657 {
7658 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
7659 {
7660 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7661 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
7662 }
7663 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
7664 {
7665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7666 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
7667 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
7668 }
7669 else
7670 {
7671 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
7672 if (idxRegBase != UINT8_MAX)
7673 {
7674 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7675 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
7676 }
7677 }
7678 if (idxRegIndex != UINT8_MAX)
7679 {
7680 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7681 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7682 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
7683 }
7684 }
7685
7686#else
7687# error "port me"
7688#endif
7689
7690 if (idxRegIndex != UINT8_MAX)
7691 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7692 if (idxRegBase != UINT8_MAX)
7693 iemNativeRegFreeTmp(pReNative, idxRegBase);
7694 return off;
7695}
7696
7697
7698#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7699 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff)
7700
7701#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7702 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 64)
7703
7704#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7705 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 32)
7706
7707
7708
7709/*********************************************************************************************************************************
7710* Memory fetches and stores common *
7711*********************************************************************************************************************************/
7712
7713/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7714 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7715 * (with iSegReg = UINT8_MAX). */
7716DECL_INLINE_THROW(uint32_t)
7717iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7718 uint8_t idxVarGCPtrMem, uint8_t cbMem, bool fFetch, uintptr_t pfnFunction, uint8_t idxInstr,
7719 uint8_t offDisp = 0)
7720{
7721 /*
7722 * Assert sanity.
7723 */
7724 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7725 Assert( fFetch
7726 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
7727 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
7728 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7729 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
7730 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
7731 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7732 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7733 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7734 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7735#ifdef VBOX_STRICT
7736 if ( ( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7737 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7738 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT)
7739 && ( iSegReg == UINT8_MAX
7740 || iSegReg == X86_SREG_DS
7741 || iSegReg == X86_SREG_ES
7742 || iSegReg == X86_SREG_SS
7743 || (iSegReg == X86_SREG_CS && (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT) ))
7744 {
7745 Assert(iSegReg == UINT8_MAX);
7746 switch (cbMem)
7747 {
7748 case 1: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8 : (uintptr_t)iemNativeHlpMemFlatStoreDataU8 )); break;
7749 case 2: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16 : (uintptr_t)iemNativeHlpMemFlatStoreDataU16)); break;
7750 case 4: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32 : (uintptr_t)iemNativeHlpMemFlatStoreDataU32)); break;
7751 case 8: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64 : (uintptr_t)iemNativeHlpMemFlatStoreDataU64)); break;
7752 }
7753 }
7754 else
7755 {
7756 Assert(iSegReg < 6);
7757 switch (cbMem)
7758 {
7759 case 1: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFetchDataU8 : (uintptr_t)iemNativeHlpMemStoreDataU8 )); break;
7760 case 2: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFetchDataU16 : (uintptr_t)iemNativeHlpMemStoreDataU16)); break;
7761 case 4: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFetchDataU32 : (uintptr_t)iemNativeHlpMemStoreDataU32)); break;
7762 case 8: Assert(pfnFunction == (fFetch ? (uintptr_t)iemNativeHlpMemFetchDataU64 : (uintptr_t)iemNativeHlpMemStoreDataU64)); break;
7763 }
7764 }
7765#endif
7766
7767
7768#ifdef VBOX_STRICT
7769 /*
7770 * Check that the fExec flags we've got make sense.
7771 */
7772 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7773#endif
7774
7775 /*
7776 * To keep things simple we have to commit any pending writes first as we
7777 * may end up making calls.
7778 */
7779 /** @todo we could postpone this till we make the call and reload the
7780 * registers after returning from the call. Not sure if that's sensible or
7781 * not, though. */
7782 off = iemNativeRegFlushPendingWrites(pReNative, off);
7783
7784 /*
7785 * Move/spill/flush stuff out of call-volatile registers.
7786 * This is the easy way out. We could contain this to the tlb-miss branch
7787 * by saving and restoring active stuff here.
7788 */
7789 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7790 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7791
7792 /*
7793 * Define labels and allocate the result register (trying for the return
7794 * register if we can).
7795 */
7796 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7797 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7798 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7799 uint8_t const idxRegValueFetch = !fFetch ? UINT8_MAX /* special case value storing below */
7800 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7801 ? iemNativeVarSetRegister(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, off)
7802 : iemNativeVarAllocRegister(pReNative, idxVarValue, &off);
7803
7804 /*
7805 * First we try to go via the TLB.
7806 */
7807//pReNative->pInstrBuf[off++] = 0xcc;
7808 /** @todo later. */
7809
7810 /*
7811 * Call helper to do the fetching.
7812 * We flush all guest register shadow copies here.
7813 */
7814 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7815
7816#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7817 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7818#else
7819 RT_NOREF(idxInstr);
7820#endif
7821
7822 uint8_t idxRegArgValue;
7823 if (iSegReg == UINT8_MAX)
7824 idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
7825 else
7826 {
7827 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7828 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7829 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7830
7831 idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
7832 }
7833
7834 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7835 if (!fFetch)
7836 {
7837 if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
7838 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
7839 else
7840 {
7841 uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
7842 if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
7843 {
7844 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
7845 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
7846 }
7847 else
7848 {
7849 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
7850 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7851 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
7852 }
7853 }
7854 }
7855
7856 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7857 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
7858 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
7859 pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
7860 else
7861 {
7862 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
7863 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
7864 {
7865 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
7866 if (!offDisp)
7867 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
7868 else
7869 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
7870 }
7871 else
7872 {
7873 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
7874 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7875 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
7876 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
7877 if (offDisp)
7878 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
7879 }
7880 }
7881
7882 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7883 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7884
7885 /* Done setting up parameters, make the call. */
7886 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7887
7888 /*
7889 * Put the result in the right register if this is a fetch.
7890 */
7891 if (fFetch)
7892 {
7893 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
7894 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7895 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarValue, IEMNATIVE_CALL_RET_GREG);
7896 }
7897
7898 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7899
7900 return off;
7901}
7902
7903
7904
7905/*********************************************************************************************************************************
7906* Memory fetches (IEM_MEM_FETCH_XXX). *
7907*********************************************************************************************************************************/
7908
7909#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7910 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, sizeof(uint8_t), true /*fFetch*/, \
7911 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7912
7913#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, sizeof(uint16_t), true /*fFetch*/, \
7915 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7916
7917#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7918 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, sizeof(uint16_t), true /*fFetch*/, \
7919 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7920
7921#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7922 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, sizeof(uint32_t), true /*fFetch*/, \
7923 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7924
7925#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, sizeof(uint32_t), true /*fFetch*/, \
7927 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7928
7929#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7930 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, sizeof(uint64_t), true /*fFetch*/, \
7931 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7932
7933
7934#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7935 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), true /*fFetch*/, \
7936 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7937
7938#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7939 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), true /*fFetch*/, \
7940 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7941
7942#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), true /*fFetch*/, \
7944 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7945
7946#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7947 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), true /*fFetch*/, \
7948 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7949
7950
7951
7952/*********************************************************************************************************************************
7953* Memory stores (IEM_MEM_STORE_XXX). *
7954*********************************************************************************************************************************/
7955
7956#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7957 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, sizeof(uint8_t), false /*fFetch*/, \
7958 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7959
7960#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7961 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, sizeof(uint16_t), false /*fFetch*/, \
7962 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7963
7964#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7965 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, sizeof(uint32_t), false /*fFetch*/, \
7966 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7967
7968#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7969 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, sizeof(uint64_t), false /*fFetch*/, \
7970 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7971
7972
7973#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7974 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), false /*fFetch*/, \
7975 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7976
7977#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7978 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), false /*fFetch*/, \
7979 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7980
7981#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7982 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), false /*fFetch*/, \
7983 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7984
7985#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7986 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), false /*fFetch*/, \
7987 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7988
7989
7990
7991/*********************************************************************************************************************************
7992* Builtin functions *
7993*********************************************************************************************************************************/
7994
7995/**
7996 * Built-in function that calls a C-implemention function taking zero arguments.
7997 */
7998static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
7999{
8000 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
8001 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
8002 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
8003 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
8004}
8005
8006
8007/**
8008 * Built-in function that checks for pending interrupts that can be delivered or
8009 * forced action flags.
8010 *
8011 * This triggers after the completion of an instruction, so EIP is already at
8012 * the next instruction. If an IRQ or important FF is pending, this will return
8013 * a non-zero status that stops TB execution.
8014 */
8015static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
8016{
8017 RT_NOREF(pCallEntry);
8018
8019 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
8020 and I'm too lazy to create a 'Fixed' version of that one. */
8021 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
8022 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
8023
8024 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
8025
8026 /* Again, we need to load the extended EFLAGS before we actually need them
8027 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
8028 loaded them inside the check, as the shadow state would not be correct
8029 when the code branches before the load. Ditto PC. */
8030 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8031 kIemNativeGstRegUse_ReadOnly);
8032
8033 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
8034
8035 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8036
8037 /*
8038 * Start by checking the local forced actions of the EMT we're on for IRQs
8039 * and other FFs that needs servicing.
8040 */
8041 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
8042 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
8043 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
8044 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
8045 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
8046 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
8047 | VMCPU_FF_TLB_FLUSH
8048 | VMCPU_FF_UNHALT ),
8049 true /*fSetFlags*/);
8050 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
8051 uint32_t const offFixupJumpToVmCheck1 = off;
8052 off = iemNativeEmitJzToFixed(pReNative, off, 0);
8053
8054 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
8055 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
8056 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
8057 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
8058 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
8059 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
8060
8061 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
8062 suppressed by the CPU or not. */
8063 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
8064 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
8065 idxLabelReturnBreak);
8066
8067 /* We've got shadow flags set, so we must check that the PC they are valid
8068 for matches our current PC value. */
8069 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
8070 * a register. */
8071 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
8072 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
8073
8074 /*
8075 * Now check the force flags of the VM.
8076 */
8077 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
8078 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
8079 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
8080 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
8081 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
8082 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
8083
8084 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
8085
8086 /*
8087 * We're good, no IRQs or FFs pending.
8088 */
8089 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8090 iemNativeRegFreeTmp(pReNative, idxEflReg);
8091 iemNativeRegFreeTmp(pReNative, idxPcReg);
8092
8093 return off;
8094}
8095
8096
8097/**
8098 * Built-in function checks if IEMCPU::fExec has the expected value.
8099 */
8100static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
8101{
8102 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
8103 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8104
8105 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
8106 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
8107 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
8108 kIemNativeLabelType_ReturnBreak);
8109 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8110 return off;
8111}
8112
8113
8114
8115/*********************************************************************************************************************************
8116* The native code generator functions for each MC block. *
8117*********************************************************************************************************************************/
8118
8119
8120/*
8121 * Include g_apfnIemNativeRecompileFunctions and associated functions.
8122 *
8123 * This should probably live in it's own file later, but lets see what the
8124 * compile times turn out to be first.
8125 */
8126#include "IEMNativeFunctions.cpp.h"
8127
8128
8129
8130/*********************************************************************************************************************************
8131* Recompiler Core. *
8132*********************************************************************************************************************************/
8133
8134
8135/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8136static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8137{
8138 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8139 pDis->cbCachedInstr += cbMaxRead;
8140 RT_NOREF(cbMinRead);
8141 return VERR_NO_DATA;
8142}
8143
8144
8145/**
8146 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
8147 * @returns pszBuf.
8148 * @param fFlags The flags.
8149 * @param pszBuf The output buffer.
8150 * @param cbBuf The output buffer size. At least 32 bytes.
8151 */
8152DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
8153{
8154 Assert(cbBuf >= 32);
8155 static RTSTRTUPLE const s_aModes[] =
8156 {
8157 /* [00] = */ { RT_STR_TUPLE("16BIT") },
8158 /* [01] = */ { RT_STR_TUPLE("32BIT") },
8159 /* [02] = */ { RT_STR_TUPLE("!2!") },
8160 /* [03] = */ { RT_STR_TUPLE("!3!") },
8161 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
8162 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
8163 /* [06] = */ { RT_STR_TUPLE("!6!") },
8164 /* [07] = */ { RT_STR_TUPLE("!7!") },
8165 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
8166 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
8167 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
8168 /* [0b] = */ { RT_STR_TUPLE("!b!") },
8169 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
8170 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
8171 /* [0e] = */ { RT_STR_TUPLE("!e!") },
8172 /* [0f] = */ { RT_STR_TUPLE("!f!") },
8173 /* [10] = */ { RT_STR_TUPLE("!10!") },
8174 /* [11] = */ { RT_STR_TUPLE("!11!") },
8175 /* [12] = */ { RT_STR_TUPLE("!12!") },
8176 /* [13] = */ { RT_STR_TUPLE("!13!") },
8177 /* [14] = */ { RT_STR_TUPLE("!14!") },
8178 /* [15] = */ { RT_STR_TUPLE("!15!") },
8179 /* [16] = */ { RT_STR_TUPLE("!16!") },
8180 /* [17] = */ { RT_STR_TUPLE("!17!") },
8181 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
8182 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
8183 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
8184 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
8185 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
8186 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
8187 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
8188 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
8189 };
8190 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
8191 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
8192 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
8193
8194 pszBuf[off++] = ' ';
8195 pszBuf[off++] = 'C';
8196 pszBuf[off++] = 'P';
8197 pszBuf[off++] = 'L';
8198 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
8199 Assert(off < 32);
8200
8201 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
8202
8203 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
8204 {
8205 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
8206 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
8207 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
8208 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
8209 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
8210 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
8211 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
8212 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
8213 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
8214 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
8215 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
8216 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
8217 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
8218 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
8219 };
8220 if (fFlags)
8221 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
8222 if (s_aFlags[i].fFlag & fFlags)
8223 {
8224 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
8225 pszBuf[off++] = ' ';
8226 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
8227 off += s_aFlags[i].cchName;
8228 fFlags &= ~s_aFlags[i].fFlag;
8229 if (!fFlags)
8230 break;
8231 }
8232 pszBuf[off] = '\0';
8233
8234 return pszBuf;
8235}
8236
8237
8238DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8239{
8240 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8241
8242 char szDisBuf[512];
8243 DISSTATE Dis;
8244 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8245 uint32_t const cNative = pTb->Native.cInstructions;
8246 uint32_t offNative = 0;
8247#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8248 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8249#endif
8250 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8251 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8252 : DISCPUMODE_64BIT;
8253#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8254 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8255#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8256 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8257#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8258# error "Port me"
8259#else
8260 csh hDisasm = ~(size_t)0;
8261# if defined(RT_ARCH_AMD64)
8262 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8263# elif defined(RT_ARCH_ARM64)
8264 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8265# else
8266# error "Port me"
8267# endif
8268 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8269#endif
8270
8271 /*
8272 * Print TB info.
8273 */
8274 pHlp->pfnPrintf(pHlp,
8275 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8276 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8277 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8278 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8279#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8280 if (pDbgInfo && pDbgInfo->cEntries > 1)
8281 {
8282 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8283
8284 /*
8285 * This disassembly is driven by the debug info which follows the native
8286 * code and indicates when it starts with the next guest instructions,
8287 * where labels are and such things.
8288 */
8289 uint32_t idxThreadedCall = 0;
8290 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8291 uint8_t idxRange = UINT8_MAX;
8292 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8293 uint32_t offRange = 0;
8294 uint32_t offOpcodes = 0;
8295 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8296 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8297 uint32_t iDbgEntry = 1;
8298 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8299
8300 while (offNative < cNative)
8301 {
8302 /* If we're at or have passed the point where the next chunk of debug
8303 info starts, process it. */
8304 if (offDbgNativeNext <= offNative)
8305 {
8306 offDbgNativeNext = UINT32_MAX;
8307 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8308 {
8309 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8310 {
8311 case kIemTbDbgEntryType_GuestInstruction:
8312 {
8313 /* Did the exec flag change? */
8314 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8315 {
8316 pHlp->pfnPrintf(pHlp,
8317 " fExec change %#08x -> %#08x %s\n",
8318 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8319 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8320 szDisBuf, sizeof(szDisBuf)));
8321 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8322 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8323 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8324 : DISCPUMODE_64BIT;
8325 }
8326
8327 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8328 where the compilation was aborted before the opcode was recorded and the actual
8329 instruction was translated to a threaded call. This may happen when we run out
8330 of ranges, or when some complicated interrupts/FFs are found to be pending or
8331 similar. So, we just deal with it here rather than in the compiler code as it
8332 is a lot simpler to do up here. */
8333 if ( idxRange == UINT8_MAX
8334 || idxRange >= cRanges
8335 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8336 {
8337 idxRange += 1;
8338 if (idxRange < cRanges)
8339 offRange = 0;
8340 else
8341 continue;
8342 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
8343 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8344 + (pTb->aRanges[idxRange].idxPhysPage == 0
8345 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8346 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8347 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8348 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8349 pTb->aRanges[idxRange].idxPhysPage);
8350 }
8351
8352 /* Disassemble the instruction. */
8353 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8354 uint32_t cbInstr = 1;
8355 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8356 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8357 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8358 if (RT_SUCCESS(rc))
8359 {
8360 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8361 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8362 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8363 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8364
8365 static unsigned const s_offMarker = 55;
8366 static char const s_szMarker[] = " ; <--- guest";
8367 if (cch < s_offMarker)
8368 {
8369 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8370 cch = s_offMarker;
8371 }
8372 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8373 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8374
8375 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8376 }
8377 else
8378 {
8379 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8380 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8381 cbInstr = 1;
8382 }
8383 GCPhysPc += cbInstr;
8384 offOpcodes += cbInstr;
8385 offRange += cbInstr;
8386 continue;
8387 }
8388
8389 case kIemTbDbgEntryType_ThreadedCall:
8390 pHlp->pfnPrintf(pHlp,
8391 " Call #%u to %s (%u args)%s\n",
8392 idxThreadedCall,
8393 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8394 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8395 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
8396 idxThreadedCall++;
8397 continue;
8398
8399 case kIemTbDbgEntryType_GuestRegShadowing:
8400 {
8401 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8402 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8403 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8404 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8405 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8406 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8407 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8408 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8409 else
8410 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8411 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8412 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8413 continue;
8414 }
8415
8416 case kIemTbDbgEntryType_Label:
8417 {
8418 const char *pszName = "what_the_fudge";
8419 const char *pszComment = "";
8420 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8421 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8422 {
8423 case kIemNativeLabelType_Return:
8424 pszName = "Return";
8425 break;
8426 case kIemNativeLabelType_ReturnBreak:
8427 pszName = "ReturnBreak";
8428 break;
8429 case kIemNativeLabelType_ReturnWithFlags:
8430 pszName = "ReturnWithFlags";
8431 break;
8432 case kIemNativeLabelType_NonZeroRetOrPassUp:
8433 pszName = "NonZeroRetOrPassUp";
8434 break;
8435 case kIemNativeLabelType_RaiseGp0:
8436 pszName = "RaiseGp0";
8437 break;
8438 case kIemNativeLabelType_If:
8439 pszName = "If";
8440 fNumbered = true;
8441 break;
8442 case kIemNativeLabelType_Else:
8443 pszName = "Else";
8444 fNumbered = true;
8445 pszComment = " ; regs state restored pre-if-block";
8446 break;
8447 case kIemNativeLabelType_Endif:
8448 pszName = "Endif";
8449 fNumbered = true;
8450 break;
8451 case kIemNativeLabelType_CheckIrq:
8452 pszName = "CheckIrq_CheckVM";
8453 fNumbered = true;
8454 break;
8455 case kIemNativeLabelType_TlbMiss:
8456 pszName = "CheckIrq_TlbMiss";
8457 fNumbered = true;
8458 break;
8459 case kIemNativeLabelType_TlbDone:
8460 pszName = "CheckIrq_TlbDone";
8461 fNumbered = true;
8462 break;
8463 case kIemNativeLabelType_Invalid:
8464 case kIemNativeLabelType_End:
8465 break;
8466 }
8467 if (fNumbered)
8468 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8469 else
8470 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8471 continue;
8472 }
8473
8474 case kIemTbDbgEntryType_NativeOffset:
8475 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8476 Assert(offDbgNativeNext > offNative);
8477 break;
8478
8479 default:
8480 AssertFailed();
8481 }
8482 iDbgEntry++;
8483 break;
8484 }
8485 }
8486
8487 /*
8488 * Disassemble the next native instruction.
8489 */
8490 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8491# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8492 uint32_t cbInstr = sizeof(paNative[0]);
8493 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8494 if (RT_SUCCESS(rc))
8495 {
8496# if defined(RT_ARCH_AMD64)
8497 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8498 {
8499 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8500 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8501 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
8502 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8503 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8504 uInfo & 0x8000 ? " - recompiled" : "");
8505 else
8506 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8507 }
8508 else
8509# endif
8510 {
8511# ifdef RT_ARCH_AMD64
8512 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8513 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8514 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8515 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8516# elif defined(RT_ARCH_ARM64)
8517 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8518 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8519 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8520# else
8521# error "Port me"
8522# endif
8523 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8524 }
8525 }
8526 else
8527 {
8528# if defined(RT_ARCH_AMD64)
8529 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8530 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8531# elif defined(RT_ARCH_ARM64)
8532 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8533# else
8534# error "Port me"
8535# endif
8536 cbInstr = sizeof(paNative[0]);
8537 }
8538 offNative += cbInstr / sizeof(paNative[0]);
8539
8540# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8541 cs_insn *pInstr;
8542 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8543 (uintptr_t)pNativeCur, 1, &pInstr);
8544 if (cInstrs > 0)
8545 {
8546 Assert(cInstrs == 1);
8547# if defined(RT_ARCH_AMD64)
8548 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8549 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8550# else
8551 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8552 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8553# endif
8554 offNative += pInstr->size / sizeof(*pNativeCur);
8555 cs_free(pInstr, cInstrs);
8556 }
8557 else
8558 {
8559# if defined(RT_ARCH_AMD64)
8560 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8561 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8562# else
8563 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8564# endif
8565 offNative++;
8566 }
8567# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8568 }
8569 }
8570 else
8571#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8572 {
8573 /*
8574 * No debug info, just disassemble the x86 code and then the native code.
8575 *
8576 * First the guest code:
8577 */
8578 for (unsigned i = 0; i < pTb->cRanges; i++)
8579 {
8580 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8581 + (pTb->aRanges[i].idxPhysPage == 0
8582 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8583 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8584 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8585 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8586 unsigned off = pTb->aRanges[i].offOpcodes;
8587 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8588 while (off < cbOpcodes)
8589 {
8590 uint32_t cbInstr = 1;
8591 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8592 &pTb->pabOpcodes[off], cbOpcodes - off,
8593 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8594 if (RT_SUCCESS(rc))
8595 {
8596 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8597 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8598 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8599 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8600 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8601 GCPhysPc += cbInstr;
8602 off += cbInstr;
8603 }
8604 else
8605 {
8606 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8607 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8608 break;
8609 }
8610 }
8611 }
8612
8613 /*
8614 * Then the native code:
8615 */
8616 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8617 while (offNative < cNative)
8618 {
8619 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8620# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8621 uint32_t cbInstr = sizeof(paNative[0]);
8622 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8623 if (RT_SUCCESS(rc))
8624 {
8625# if defined(RT_ARCH_AMD64)
8626 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8627 {
8628 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8629 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8630 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
8631 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8632 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8633 uInfo & 0x8000 ? " - recompiled" : "");
8634 else
8635 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8636 }
8637 else
8638# endif
8639 {
8640# ifdef RT_ARCH_AMD64
8641 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8642 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8643 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8644 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8645# elif defined(RT_ARCH_ARM64)
8646 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8647 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8648 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8649# else
8650# error "Port me"
8651# endif
8652 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8653 }
8654 }
8655 else
8656 {
8657# if defined(RT_ARCH_AMD64)
8658 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8659 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8660# else
8661 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8662# endif
8663 cbInstr = sizeof(paNative[0]);
8664 }
8665 offNative += cbInstr / sizeof(paNative[0]);
8666
8667# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8668 cs_insn *pInstr;
8669 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8670 (uintptr_t)pNativeCur, 1, &pInstr);
8671 if (cInstrs > 0)
8672 {
8673 Assert(cInstrs == 1);
8674# if defined(RT_ARCH_AMD64)
8675 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8676 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8677# else
8678 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8679 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8680# endif
8681 offNative += pInstr->size / sizeof(*pNativeCur);
8682 cs_free(pInstr, cInstrs);
8683 }
8684 else
8685 {
8686# if defined(RT_ARCH_AMD64)
8687 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8688 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8689# else
8690 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8691# endif
8692 offNative++;
8693 }
8694# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8695 }
8696 }
8697
8698#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8699 /* Cleanup. */
8700 cs_close(&hDisasm);
8701#endif
8702}
8703
8704
8705/**
8706 * Recompiles the given threaded TB into a native one.
8707 *
8708 * In case of failure the translation block will be returned as-is.
8709 *
8710 * @returns pTb.
8711 * @param pVCpu The cross context virtual CPU structure of the calling
8712 * thread.
8713 * @param pTb The threaded translation to recompile to native.
8714 */
8715DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8716{
8717 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
8718
8719 /*
8720 * The first time thru, we allocate the recompiler state, the other times
8721 * we just need to reset it before using it again.
8722 */
8723 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
8724 if (RT_LIKELY(pReNative))
8725 iemNativeReInit(pReNative, pTb);
8726 else
8727 {
8728 pReNative = iemNativeInit(pVCpu, pTb);
8729 AssertReturn(pReNative, pTb);
8730 }
8731
8732 /*
8733 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
8734 * for aborting if an error happens.
8735 */
8736 uint32_t cCallsLeft = pTb->Thrd.cCalls;
8737#ifdef LOG_ENABLED
8738 uint32_t const cCallsOrg = cCallsLeft;
8739#endif
8740 uint32_t off = 0;
8741 int rc = VINF_SUCCESS;
8742 IEMNATIVE_TRY_SETJMP(pReNative, rc)
8743 {
8744 /*
8745 * Emit prolog code (fixed).
8746 */
8747 off = iemNativeEmitProlog(pReNative, off);
8748
8749 /*
8750 * Convert the calls to native code.
8751 */
8752#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8753 int32_t iGstInstr = -1;
8754#endif
8755#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
8756 uint32_t cThreadedCalls = 0;
8757 uint32_t cRecompiledCalls = 0;
8758#endif
8759 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
8760 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
8761 while (cCallsLeft-- > 0)
8762 {
8763 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
8764
8765 /*
8766 * Debug info and assembly markup.
8767 */
8768 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
8769 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
8770#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8771 iemNativeDbgInfoAddNativeOffset(pReNative, off);
8772 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
8773 {
8774 if (iGstInstr < (int32_t)pTb->cInstructions)
8775 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
8776 else
8777 Assert(iGstInstr == pTb->cInstructions);
8778 iGstInstr = pCallEntry->idxInstr;
8779 }
8780 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
8781#endif
8782#if defined(VBOX_STRICT)
8783 off = iemNativeEmitMarker(pReNative, off,
8784 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
8785 pCallEntry->enmFunction));
8786#endif
8787#if defined(VBOX_STRICT)
8788 iemNativeRegAssertSanity(pReNative);
8789#endif
8790
8791 /*
8792 * Actual work.
8793 */
8794 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
8795 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "" : "(todo)"));
8796 if (pfnRecom) /** @todo stats on this. */
8797 {
8798 off = pfnRecom(pReNative, off, pCallEntry);
8799 STAM_REL_STATS({cRecompiledCalls++;});
8800 }
8801 else
8802 {
8803 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
8804 STAM_REL_STATS({cThreadedCalls++;});
8805 }
8806 Assert(off <= pReNative->cInstrBufAlloc);
8807 Assert(pReNative->cCondDepth == 0);
8808
8809 /*
8810 * Advance.
8811 */
8812 pCallEntry++;
8813 }
8814
8815 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
8816 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
8817 if (!cThreadedCalls)
8818 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
8819
8820 /*
8821 * Emit the epilog code.
8822 */
8823 uint32_t idxReturnLabel;
8824 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
8825
8826 /*
8827 * Generate special jump labels.
8828 */
8829 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
8830 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
8831 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
8832 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
8833 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
8834 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
8835 }
8836 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
8837 {
8838 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
8839 return pTb;
8840 }
8841 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
8842 Assert(off <= pReNative->cInstrBufAlloc);
8843
8844 /*
8845 * Make sure all labels has been defined.
8846 */
8847 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
8848#ifdef VBOX_STRICT
8849 uint32_t const cLabels = pReNative->cLabels;
8850 for (uint32_t i = 0; i < cLabels; i++)
8851 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
8852#endif
8853
8854 /*
8855 * Allocate executable memory, copy over the code we've generated.
8856 */
8857 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
8858 if (pTbAllocator->pDelayedFreeHead)
8859 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
8860
8861 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
8862 AssertReturn(paFinalInstrBuf, pTb);
8863 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
8864
8865 /*
8866 * Apply fixups.
8867 */
8868 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
8869 uint32_t const cFixups = pReNative->cFixups;
8870 for (uint32_t i = 0; i < cFixups; i++)
8871 {
8872 Assert(paFixups[i].off < off);
8873 Assert(paFixups[i].idxLabel < cLabels);
8874 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
8875 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
8876 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
8877 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
8878 switch (paFixups[i].enmType)
8879 {
8880#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
8881 case kIemNativeFixupType_Rel32:
8882 Assert(paFixups[i].off + 4 <= off);
8883 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8884 continue;
8885
8886#elif defined(RT_ARCH_ARM64)
8887 case kIemNativeFixupType_RelImm26At0:
8888 {
8889 Assert(paFixups[i].off < off);
8890 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8891 Assert(offDisp >= -262144 && offDisp < 262144);
8892 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
8893 continue;
8894 }
8895
8896 case kIemNativeFixupType_RelImm19At5:
8897 {
8898 Assert(paFixups[i].off < off);
8899 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8900 Assert(offDisp >= -262144 && offDisp < 262144);
8901 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
8902 continue;
8903 }
8904
8905 case kIemNativeFixupType_RelImm14At5:
8906 {
8907 Assert(paFixups[i].off < off);
8908 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8909 Assert(offDisp >= -8192 && offDisp < 8192);
8910 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
8911 continue;
8912 }
8913
8914#endif
8915 case kIemNativeFixupType_Invalid:
8916 case kIemNativeFixupType_End:
8917 break;
8918 }
8919 AssertFailed();
8920 }
8921
8922 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
8923 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
8924
8925 /*
8926 * Convert the translation block.
8927 */
8928 RTMemFree(pTb->Thrd.paCalls);
8929 pTb->Native.paInstructions = paFinalInstrBuf;
8930 pTb->Native.cInstructions = off;
8931 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
8932#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8933 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
8934 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
8935#endif
8936
8937 Assert(pTbAllocator->cThreadedTbs > 0);
8938 pTbAllocator->cThreadedTbs -= 1;
8939 pTbAllocator->cNativeTbs += 1;
8940 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
8941
8942#ifdef LOG_ENABLED
8943 /*
8944 * Disassemble to the log if enabled.
8945 */
8946 if (LogIs3Enabled())
8947 {
8948 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
8949 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
8950# ifdef DEBUG_bird
8951 RTLogFlush(NULL);
8952# endif
8953 }
8954#endif
8955
8956 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
8957 return pTb;
8958}
8959
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette