VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102313

Last change on this file since 102313 was 102313, checked in by vboxsync, 14 months ago

VMM/IEM: First recompiled memory access. This is without any TLB usage, so it'll will just make a call to fetch the memory. Fixed bug handling referenced variables. Optimized lable lookup. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 373.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102313 2023-11-27 13:01:13Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128
129/*********************************************************************************************************************************
130* Defined Constants And Macros *
131*********************************************************************************************************************************/
132/** Always count instructions for now. */
133#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
134
135
136/*********************************************************************************************************************************
137* Internal Functions *
138*********************************************************************************************************************************/
139#ifdef VBOX_STRICT
140static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
141 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
142static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
143#endif
144#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
145static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
146static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
147#endif
148DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
149DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
150 IEMNATIVEGSTREG enmGstReg, uint32_t off);
151
152
153/*********************************************************************************************************************************
154* Executable Memory Allocator *
155*********************************************************************************************************************************/
156/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157 * Use an alternative chunk sub-allocator that does store internal data
158 * in the chunk.
159 *
160 * Using the RTHeapSimple is not practial on newer darwin systems where
161 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
162 * memory. We would have to change the protection of the whole chunk for
163 * every call to RTHeapSimple, which would be rather expensive.
164 *
165 * This alternative implemenation let restrict page protection modifications
166 * to the pages backing the executable memory we just allocated.
167 */
168#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
169/** The chunk sub-allocation unit size in bytes. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
171/** The chunk sub-allocation unit size as a shift factor. */
172#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
173
174#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
175# ifdef IEMNATIVE_USE_GDB_JIT
176# define IEMNATIVE_USE_GDB_JIT_ET_DYN
177
178/** GDB JIT: Code entry. */
179typedef struct GDBJITCODEENTRY
180{
181 struct GDBJITCODEENTRY *pNext;
182 struct GDBJITCODEENTRY *pPrev;
183 uint8_t *pbSymFile;
184 uint64_t cbSymFile;
185} GDBJITCODEENTRY;
186
187/** GDB JIT: Actions. */
188typedef enum GDBJITACTIONS : uint32_t
189{
190 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
191} GDBJITACTIONS;
192
193/** GDB JIT: Descriptor. */
194typedef struct GDBJITDESCRIPTOR
195{
196 uint32_t uVersion;
197 GDBJITACTIONS enmAction;
198 GDBJITCODEENTRY *pRelevant;
199 GDBJITCODEENTRY *pHead;
200 /** Our addition: */
201 GDBJITCODEENTRY *pTail;
202} GDBJITDESCRIPTOR;
203
204/** GDB JIT: Our simple symbol file data. */
205typedef struct GDBJITSYMFILE
206{
207 Elf64_Ehdr EHdr;
208# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
209 Elf64_Shdr aShdrs[5];
210# else
211 Elf64_Shdr aShdrs[7];
212 Elf64_Phdr aPhdrs[2];
213# endif
214 /** The dwarf ehframe data for the chunk. */
215 uint8_t abEhFrame[512];
216 char szzStrTab[128];
217 Elf64_Sym aSymbols[3];
218# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
219 Elf64_Sym aDynSyms[2];
220 Elf64_Dyn aDyn[6];
221# endif
222} GDBJITSYMFILE;
223
224extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
225extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
226
227/** Init once for g_IemNativeGdbJitLock. */
228static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
229/** Init once for the critical section. */
230static RTCRITSECT g_IemNativeGdbJitLock;
231
232/** GDB reads the info here. */
233GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
234
235/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
236DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
237{
238 ASMNopPause();
239}
240
241/** @callback_method_impl{FNRTONCE} */
242static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
243{
244 RT_NOREF(pvUser);
245 return RTCritSectInit(&g_IemNativeGdbJitLock);
246}
247
248
249# endif /* IEMNATIVE_USE_GDB_JIT */
250
251/**
252 * Per-chunk unwind info for non-windows hosts.
253 */
254typedef struct IEMEXECMEMCHUNKEHFRAME
255{
256# ifdef IEMNATIVE_USE_LIBUNWIND
257 /** The offset of the FDA into abEhFrame. */
258 uintptr_t offFda;
259# else
260 /** 'struct object' storage area. */
261 uint8_t abObject[1024];
262# endif
263# ifdef IEMNATIVE_USE_GDB_JIT
264# if 0
265 /** The GDB JIT 'symbol file' data. */
266 GDBJITSYMFILE GdbJitSymFile;
267# endif
268 /** The GDB JIT list entry. */
269 GDBJITCODEENTRY GdbJitEntry;
270# endif
271 /** The dwarf ehframe data for the chunk. */
272 uint8_t abEhFrame[512];
273} IEMEXECMEMCHUNKEHFRAME;
274/** Pointer to per-chunk info info for non-windows hosts. */
275typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
276#endif
277
278
279/**
280 * An chunk of executable memory.
281 */
282typedef struct IEMEXECMEMCHUNK
283{
284#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
285 /** Number of free items in this chunk. */
286 uint32_t cFreeUnits;
287 /** Hint were to start searching for free space in the allocation bitmap. */
288 uint32_t idxFreeHint;
289#else
290 /** The heap handle. */
291 RTHEAPSIMPLE hHeap;
292#endif
293 /** Pointer to the chunk. */
294 void *pvChunk;
295#ifdef IN_RING3
296 /**
297 * Pointer to the unwind information.
298 *
299 * This is used during C++ throw and longjmp (windows and probably most other
300 * platforms). Some debuggers (windbg) makes use of it as well.
301 *
302 * Windows: This is allocated from hHeap on windows because (at least for
303 * AMD64) the UNWIND_INFO structure address in the
304 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
305 *
306 * Others: Allocated from the regular heap to avoid unnecessary executable data
307 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
308 void *pvUnwindInfo;
309#elif defined(IN_RING0)
310 /** Allocation handle. */
311 RTR0MEMOBJ hMemObj;
312#endif
313} IEMEXECMEMCHUNK;
314/** Pointer to a memory chunk. */
315typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
316
317
318/**
319 * Executable memory allocator for the native recompiler.
320 */
321typedef struct IEMEXECMEMALLOCATOR
322{
323 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
324 uint32_t uMagic;
325
326 /** The chunk size. */
327 uint32_t cbChunk;
328 /** The maximum number of chunks. */
329 uint32_t cMaxChunks;
330 /** The current number of chunks. */
331 uint32_t cChunks;
332 /** Hint where to start looking for available memory. */
333 uint32_t idxChunkHint;
334 /** Statistics: Current number of allocations. */
335 uint32_t cAllocations;
336
337 /** The total amount of memory available. */
338 uint64_t cbTotal;
339 /** Total amount of free memory. */
340 uint64_t cbFree;
341 /** Total amount of memory allocated. */
342 uint64_t cbAllocated;
343
344#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
345 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
346 *
347 * Since the chunk size is a power of two and the minimum chunk size is a lot
348 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
349 * require a whole number of uint64_t elements in the allocation bitmap. So,
350 * for sake of simplicity, they are allocated as one continous chunk for
351 * simplicity/laziness. */
352 uint64_t *pbmAlloc;
353 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
354 uint32_t cUnitsPerChunk;
355 /** Number of bitmap elements per chunk (for quickly locating the bitmap
356 * portion corresponding to an chunk). */
357 uint32_t cBitmapElementsPerChunk;
358#else
359 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
360 * @{ */
361 /** The size of the heap internal block header. This is used to adjust the
362 * request memory size to make sure there is exacly enough room for a header at
363 * the end of the blocks we allocate before the next 64 byte alignment line. */
364 uint32_t cbHeapBlockHdr;
365 /** The size of initial heap allocation required make sure the first
366 * allocation is correctly aligned. */
367 uint32_t cbHeapAlignTweak;
368 /** The alignment tweak allocation address. */
369 void *pvAlignTweak;
370 /** @} */
371#endif
372
373#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
374 /** Pointer to the array of unwind info running parallel to aChunks (same
375 * allocation as this structure, located after the bitmaps).
376 * (For Windows, the structures must reside in 32-bit RVA distance to the
377 * actual chunk, so they are allocated off the chunk.) */
378 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
379#endif
380
381 /** The allocation chunks. */
382 RT_FLEXIBLE_ARRAY_EXTENSION
383 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
384} IEMEXECMEMALLOCATOR;
385/** Pointer to an executable memory allocator. */
386typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
387
388/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
389#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
390
391
392static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
393
394
395/**
396 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
397 * the heap statistics.
398 */
399static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
400 uint32_t cbReq, uint32_t idxChunk)
401{
402 pExecMemAllocator->cAllocations += 1;
403 pExecMemAllocator->cbAllocated += cbReq;
404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
405 pExecMemAllocator->cbFree -= cbReq;
406#else
407 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
408#endif
409 pExecMemAllocator->idxChunkHint = idxChunk;
410
411#ifdef RT_OS_DARWIN
412 /*
413 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
414 * on darwin. So, we mark the pages returned as read+write after alloc and
415 * expect the caller to call iemExecMemAllocatorReadyForUse when done
416 * writing to the allocation.
417 *
418 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
419 * for details.
420 */
421 /** @todo detect if this is necessary... it wasn't required on 10.15 or
422 * whatever older version it was. */
423 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
424 AssertRC(rc);
425#endif
426
427 return pvRet;
428}
429
430
431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
432static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
433 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
434{
435 /*
436 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
437 */
438 Assert(!(cToScan & 63));
439 Assert(!(idxFirst & 63));
440 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
441 pbmAlloc += idxFirst / 64;
442
443 /*
444 * Scan the bitmap for cReqUnits of consequtive clear bits
445 */
446 /** @todo This can probably be done more efficiently for non-x86 systems. */
447 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
448 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
449 {
450 uint32_t idxAddBit = 1;
451 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
452 idxAddBit++;
453 if (idxAddBit >= cReqUnits)
454 {
455 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
456
457 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
458 pChunk->cFreeUnits -= cReqUnits;
459 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
460
461 void * const pvRet = (uint8_t *)pChunk->pvChunk
462 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
463
464 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
465 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
466 }
467
468 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
469 }
470 return NULL;
471}
472#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
473
474
475static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
476{
477#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
478 /*
479 * Figure out how much to allocate.
480 */
481 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
482 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
483 {
484 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
485 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
486 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
487 {
488 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
489 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
490 if (pvRet)
491 return pvRet;
492 }
493 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
494 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
495 cReqUnits, idxChunk);
496 }
497#else
498 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
499 if (pvRet)
500 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
501#endif
502 return NULL;
503
504}
505
506
507/**
508 * Allocates @a cbReq bytes of executable memory.
509 *
510 * @returns Pointer to the memory, NULL if out of memory or other problem
511 * encountered.
512 * @param pVCpu The cross context virtual CPU structure of the calling
513 * thread.
514 * @param cbReq How many bytes are required.
515 */
516static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
517{
518 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
519 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
520 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
521
522 /*
523 * Adjust the request size so it'll fit the allocator alignment/whatnot.
524 *
525 * For the RTHeapSimple allocator this means to follow the logic described
526 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
527 * existing chunks if we think we've got sufficient free memory around.
528 *
529 * While for the alternative one we just align it up to a whole unit size.
530 */
531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
532 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
533#else
534 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
535#endif
536 if (cbReq <= pExecMemAllocator->cbFree)
537 {
538 uint32_t const cChunks = pExecMemAllocator->cChunks;
539 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
540 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
541 {
542 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
543 if (pvRet)
544 return pvRet;
545 }
546 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
547 {
548 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
549 if (pvRet)
550 return pvRet;
551 }
552 }
553
554 /*
555 * Can we grow it with another chunk?
556 */
557 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
558 {
559 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
560 AssertLogRelRCReturn(rc, NULL);
561
562 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
563 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
564 if (pvRet)
565 return pvRet;
566 AssertFailed();
567 }
568
569 /* What now? Prune native translation blocks from the cache? */
570 AssertFailed();
571 return NULL;
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1588#ifndef _MSC_VER
1589 return VINF_IEM_RAISED_XCPT; /* not reached */
1590#endif
1591}
1592
1593
1594/* Segmented memory helpers: */
1595
1596/**
1597 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1598 */
1599IEM_DECL_NATIVE_HLP_DEF(uint8_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint8_t idxInstr))
1600{
1601 RT_NOREF(idxInstr); /** @todo idxInstr */
1602 return iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1603}
1604
1605
1606/**
1607 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1608 */
1609IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint8_t idxInstr))
1610{
1611 RT_NOREF(idxInstr); /** @todo idxInstr */
1612 return iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1613}
1614
1615
1616/**
1617 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1618 */
1619IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint8_t idxInstr))
1620{
1621 RT_NOREF(idxInstr); /** @todo idxInstr */
1622 return iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1623}
1624
1625
1626/**
1627 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1628 */
1629IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint8_t idxInstr))
1630{
1631 RT_NOREF(idxInstr); /** @todo idxInstr */
1632 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1633}
1634
1635
1636/* Flat memory helpers: */
1637
1638/**
1639 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1640 */
1641IEM_DECL_NATIVE_HLP_DEF(uint8_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t idxInstr))
1642{
1643 RT_NOREF(idxInstr); /** @todo idxInstr */
1644 return iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1645}
1646
1647
1648/**
1649 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t idxInstr))
1652{
1653 RT_NOREF(idxInstr); /** @todo idxInstr */
1654 return iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1655}
1656
1657
1658/**
1659 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t idxInstr))
1662{
1663 RT_NOREF(idxInstr); /** @todo idxInstr */
1664 return iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1665}
1666
1667
1668/**
1669 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1670 */
1671IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t idxInstr))
1672{
1673 RT_NOREF(idxInstr); /** @todo idxInstr */
1674 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
1675}
1676
1677
1678
1679/**
1680 * Reinitializes the native recompiler state.
1681 *
1682 * Called before starting a new recompile job.
1683 */
1684static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1685{
1686 pReNative->cLabels = 0;
1687 pReNative->bmLabelTypes = 0;
1688 pReNative->cFixups = 0;
1689#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1690 pReNative->pDbgInfo->cEntries = 0;
1691#endif
1692 pReNative->pTbOrg = pTb;
1693 pReNative->cCondDepth = 0;
1694 pReNative->uCondSeqNo = 0;
1695 pReNative->uCheckIrqSeqNo = 0;
1696 pReNative->uTlbSeqNo = 0;
1697
1698 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1699#if IEMNATIVE_HST_GREG_COUNT < 32
1700 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1701#endif
1702 ;
1703 pReNative->Core.bmHstRegsWithGstShadow = 0;
1704 pReNative->Core.bmGstRegShadows = 0;
1705 pReNative->Core.bmVars = 0;
1706 pReNative->Core.bmStack = 0;
1707 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1708 pReNative->Core.u64ArgVars = UINT64_MAX;
1709
1710 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 6);
1711 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1712 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1713 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1714 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1715 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1716 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1717
1718 /* Full host register reinit: */
1719 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1720 {
1721 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1722 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1723 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1724 }
1725
1726 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1727 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1728#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1729 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1730#endif
1731#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1732 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1733#endif
1734 );
1735 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1736 {
1737 fRegs &= ~RT_BIT_32(idxReg);
1738 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1739 }
1740
1741 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1742#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1743 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1744#endif
1745#ifdef IEMNATIVE_REG_FIXED_TMP0
1746 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1747#endif
1748 return pReNative;
1749}
1750
1751
1752/**
1753 * Allocates and initializes the native recompiler state.
1754 *
1755 * This is called the first time an EMT wants to recompile something.
1756 *
1757 * @returns Pointer to the new recompiler state.
1758 * @param pVCpu The cross context virtual CPU structure of the calling
1759 * thread.
1760 * @param pTb The TB that's about to be recompiled.
1761 * @thread EMT(pVCpu)
1762 */
1763static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1764{
1765 VMCPU_ASSERT_EMT(pVCpu);
1766
1767 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1768 AssertReturn(pReNative, NULL);
1769
1770 /*
1771 * Try allocate all the buffers and stuff we need.
1772 */
1773 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1774 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1775 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1776#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1777 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1778#endif
1779 if (RT_LIKELY( pReNative->pInstrBuf
1780 && pReNative->paLabels
1781 && pReNative->paFixups)
1782#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1783 && pReNative->pDbgInfo
1784#endif
1785 )
1786 {
1787 /*
1788 * Set the buffer & array sizes on success.
1789 */
1790 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1791 pReNative->cLabelsAlloc = _8K;
1792 pReNative->cFixupsAlloc = _16K;
1793#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1794 pReNative->cDbgInfoAlloc = _16K;
1795#endif
1796
1797 /*
1798 * Done, just need to save it and reinit it.
1799 */
1800 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1801 return iemNativeReInit(pReNative, pTb);
1802 }
1803
1804 /*
1805 * Failed. Cleanup and return.
1806 */
1807 AssertFailed();
1808 RTMemFree(pReNative->pInstrBuf);
1809 RTMemFree(pReNative->paLabels);
1810 RTMemFree(pReNative->paFixups);
1811#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1812 RTMemFree(pReNative->pDbgInfo);
1813#endif
1814 RTMemFree(pReNative);
1815 return NULL;
1816}
1817
1818
1819/**
1820 * Creates a label
1821 *
1822 * If the label does not yet have a defined position,
1823 * call iemNativeLabelDefine() later to set it.
1824 *
1825 * @returns Label ID. Throws VBox status code on failure, so no need to check
1826 * the return value.
1827 * @param pReNative The native recompile state.
1828 * @param enmType The label type.
1829 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1830 * label is not yet defined (default).
1831 * @param uData Data associated with the lable. Only applicable to
1832 * certain type of labels. Default is zero.
1833 */
1834DECL_HIDDEN_THROW(uint32_t)
1835iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1836 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1837{
1838 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
1839
1840 /*
1841 * Locate existing label definition.
1842 *
1843 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1844 * and uData is zero.
1845 */
1846 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1847 uint32_t const cLabels = pReNative->cLabels;
1848 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1849#ifndef VBOX_STRICT
1850 && enmType >= kIemNativeLabelType_FirstWithMultipleInstances
1851 && offWhere == UINT32_MAX
1852 && uData == 0
1853#endif
1854 )
1855 {
1856#ifndef VBOX_STRICT
1857 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
1858 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1859 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
1860 if (idxLabel < pReNative->cLabels)
1861 return idxLabel;
1862#else
1863 for (uint32_t i = 0; i < cLabels; i++)
1864 if ( paLabels[i].enmType == enmType
1865 && paLabels[i].uData == uData)
1866 {
1867 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1868 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1869 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1870 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
1871 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1872 return i;
1873 }
1874 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
1875 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1876#endif
1877 }
1878
1879 /*
1880 * Make sure we've got room for another label.
1881 */
1882 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1883 { /* likely */ }
1884 else
1885 {
1886 uint32_t cNew = pReNative->cLabelsAlloc;
1887 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1888 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1889 cNew *= 2;
1890 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1891 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1892 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1893 pReNative->paLabels = paLabels;
1894 pReNative->cLabelsAlloc = cNew;
1895 }
1896
1897 /*
1898 * Define a new label.
1899 */
1900 paLabels[cLabels].off = offWhere;
1901 paLabels[cLabels].enmType = enmType;
1902 paLabels[cLabels].uData = uData;
1903 pReNative->cLabels = cLabels + 1;
1904
1905 Assert((unsigned)enmType < 64);
1906 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1907
1908 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
1909 {
1910 Assert(uData == 0);
1911 pReNative->aidxUniqueLabels[enmType] = cLabels;
1912 }
1913
1914 if (offWhere != UINT32_MAX)
1915 {
1916#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1917 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1918 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1919#endif
1920 }
1921 return cLabels;
1922}
1923
1924
1925/**
1926 * Defines the location of an existing label.
1927 *
1928 * @param pReNative The native recompile state.
1929 * @param idxLabel The label to define.
1930 * @param offWhere The position.
1931 */
1932DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1933{
1934 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1935 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1936 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1937 pLabel->off = offWhere;
1938#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1939 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1940 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1941#endif
1942}
1943
1944
1945/**
1946 * Looks up a lable.
1947 *
1948 * @returns Label ID if found, UINT32_MAX if not.
1949 */
1950static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1951 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1952{
1953 Assert((unsigned)enmType < 64);
1954 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1955 {
1956 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
1957 return pReNative->aidxUniqueLabels[enmType];
1958
1959 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1960 uint32_t const cLabels = pReNative->cLabels;
1961 for (uint32_t i = 0; i < cLabels; i++)
1962 if ( paLabels[i].enmType == enmType
1963 && paLabels[i].uData == uData
1964 && ( paLabels[i].off == offWhere
1965 || offWhere == UINT32_MAX
1966 || paLabels[i].off == UINT32_MAX))
1967 return i;
1968 }
1969 return UINT32_MAX;
1970}
1971
1972
1973/**
1974 * Adds a fixup.
1975 *
1976 * @throws VBox status code (int) on failure.
1977 * @param pReNative The native recompile state.
1978 * @param offWhere The instruction offset of the fixup location.
1979 * @param idxLabel The target label ID for the fixup.
1980 * @param enmType The fixup type.
1981 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1982 */
1983DECL_HIDDEN_THROW(void)
1984iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1985 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1986{
1987 Assert(idxLabel <= UINT16_MAX);
1988 Assert((unsigned)enmType <= UINT8_MAX);
1989
1990 /*
1991 * Make sure we've room.
1992 */
1993 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1994 uint32_t const cFixups = pReNative->cFixups;
1995 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1996 { /* likely */ }
1997 else
1998 {
1999 uint32_t cNew = pReNative->cFixupsAlloc;
2000 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2001 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2002 cNew *= 2;
2003 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2004 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2005 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2006 pReNative->paFixups = paFixups;
2007 pReNative->cFixupsAlloc = cNew;
2008 }
2009
2010 /*
2011 * Add the fixup.
2012 */
2013 paFixups[cFixups].off = offWhere;
2014 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2015 paFixups[cFixups].enmType = enmType;
2016 paFixups[cFixups].offAddend = offAddend;
2017 pReNative->cFixups = cFixups + 1;
2018}
2019
2020
2021/**
2022 * Slow code path for iemNativeInstrBufEnsure.
2023 */
2024DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2025{
2026 /* Double the buffer size till we meet the request. */
2027 uint32_t cNew = pReNative->cInstrBufAlloc;
2028 AssertReturn(cNew > 0, NULL);
2029 do
2030 cNew *= 2;
2031 while (cNew < off + cInstrReq);
2032
2033 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2034#ifdef RT_ARCH_ARM64
2035 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2036#else
2037 uint32_t const cbMaxInstrBuf = _2M;
2038#endif
2039 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2040
2041 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2042 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2043
2044 pReNative->cInstrBufAlloc = cNew;
2045 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2046}
2047
2048#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2049
2050/**
2051 * Grows the static debug info array used during recompilation.
2052 *
2053 * @returns Pointer to the new debug info block; throws VBox status code on
2054 * failure, so no need to check the return value.
2055 */
2056DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2057{
2058 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2059 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2060 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2061 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2062 pReNative->pDbgInfo = pDbgInfo;
2063 pReNative->cDbgInfoAlloc = cNew;
2064 return pDbgInfo;
2065}
2066
2067
2068/**
2069 * Adds a new debug info uninitialized entry, returning the pointer to it.
2070 */
2071DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2072{
2073 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2074 { /* likely */ }
2075 else
2076 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2077 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2078}
2079
2080
2081/**
2082 * Debug Info: Adds a native offset record, if necessary.
2083 */
2084static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2085{
2086 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2087
2088 /*
2089 * Search backwards to see if we've got a similar record already.
2090 */
2091 uint32_t idx = pDbgInfo->cEntries;
2092 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
2093 while (idx-- > idxStop)
2094 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
2095 {
2096 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
2097 return;
2098 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
2099 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2100 break;
2101 }
2102
2103 /*
2104 * Add it.
2105 */
2106 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2107 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2108 pEntry->NativeOffset.offNative = off;
2109}
2110
2111
2112/**
2113 * Debug Info: Record info about a label.
2114 */
2115static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2116{
2117 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2118 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2119 pEntry->Label.uUnused = 0;
2120 pEntry->Label.enmLabel = (uint8_t)enmType;
2121 pEntry->Label.uData = uData;
2122}
2123
2124
2125/**
2126 * Debug Info: Record info about a threaded call.
2127 */
2128static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2129{
2130 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2131 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2132 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2133 pEntry->ThreadedCall.uUnused = 0;
2134 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2135}
2136
2137
2138/**
2139 * Debug Info: Record info about a new guest instruction.
2140 */
2141static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2142{
2143 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2144 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2145 pEntry->GuestInstruction.uUnused = 0;
2146 pEntry->GuestInstruction.fExec = fExec;
2147}
2148
2149
2150/**
2151 * Debug Info: Record info about guest register shadowing.
2152 */
2153static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2154 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2155{
2156 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2157 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2158 pEntry->GuestRegShadowing.uUnused = 0;
2159 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2160 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2161 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2162}
2163
2164#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2165
2166
2167/*********************************************************************************************************************************
2168* Register Allocator *
2169*********************************************************************************************************************************/
2170
2171/**
2172 * Register parameter indexes (indexed by argument number).
2173 */
2174DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2175{
2176 IEMNATIVE_CALL_ARG0_GREG,
2177 IEMNATIVE_CALL_ARG1_GREG,
2178 IEMNATIVE_CALL_ARG2_GREG,
2179 IEMNATIVE_CALL_ARG3_GREG,
2180#if defined(IEMNATIVE_CALL_ARG4_GREG)
2181 IEMNATIVE_CALL_ARG4_GREG,
2182# if defined(IEMNATIVE_CALL_ARG5_GREG)
2183 IEMNATIVE_CALL_ARG5_GREG,
2184# if defined(IEMNATIVE_CALL_ARG6_GREG)
2185 IEMNATIVE_CALL_ARG6_GREG,
2186# if defined(IEMNATIVE_CALL_ARG7_GREG)
2187 IEMNATIVE_CALL_ARG7_GREG,
2188# endif
2189# endif
2190# endif
2191#endif
2192};
2193
2194/**
2195 * Call register masks indexed by argument count.
2196 */
2197DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2198{
2199 0,
2200 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2201 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2202 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2203 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2204 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2205#if defined(IEMNATIVE_CALL_ARG4_GREG)
2206 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2207 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2208# if defined(IEMNATIVE_CALL_ARG5_GREG)
2209 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2210 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2211# if defined(IEMNATIVE_CALL_ARG6_GREG)
2212 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2213 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2214 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2215# if defined(IEMNATIVE_CALL_ARG7_GREG)
2216 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2217 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2218 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2219# endif
2220# endif
2221# endif
2222#endif
2223};
2224
2225#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2226/**
2227 * BP offset of the stack argument slots.
2228 *
2229 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2230 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2231 */
2232DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2233{
2234 IEMNATIVE_FP_OFF_STACK_ARG0,
2235# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2236 IEMNATIVE_FP_OFF_STACK_ARG1,
2237# endif
2238# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2239 IEMNATIVE_FP_OFF_STACK_ARG2,
2240# endif
2241# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2242 IEMNATIVE_FP_OFF_STACK_ARG3,
2243# endif
2244};
2245AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2246#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2247
2248/**
2249 * Info about shadowed guest register values.
2250 * @see IEMNATIVEGSTREG
2251 */
2252static struct
2253{
2254 /** Offset in VMCPU. */
2255 uint32_t off;
2256 /** The field size. */
2257 uint8_t cb;
2258 /** Name (for logging). */
2259 const char *pszName;
2260} const g_aGstShadowInfo[] =
2261{
2262#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2263 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2264 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2265 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2266 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2267 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2268 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2269 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2270 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2271 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2272 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2273 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2274 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2275 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2276 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2277 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2278 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2279 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2280 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2281 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2282 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2283 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2284 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2285 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2286 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2287 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2288 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2289 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2290 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2291 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2292 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2293 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2294 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2295 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2296 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2297 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2298 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2299#undef CPUMCTX_OFF_AND_SIZE
2300};
2301AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2302
2303
2304/** Host CPU general purpose register names. */
2305DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2306{
2307#ifdef RT_ARCH_AMD64
2308 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2309#elif RT_ARCH_ARM64
2310 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2311 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2312#else
2313# error "port me"
2314#endif
2315};
2316
2317
2318DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2319 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2320{
2321 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2322
2323 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2324 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2325 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2326 return (uint8_t)idxReg;
2327}
2328
2329
2330/**
2331 * Tries to locate a suitable register in the given register mask.
2332 *
2333 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2334 * failed.
2335 *
2336 * @returns Host register number on success, returns UINT8_MAX on failure.
2337 */
2338static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2339{
2340 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2341 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2342 if (fRegs)
2343 {
2344 /** @todo pick better here: */
2345 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2346
2347 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2348 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2349 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2350 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2351
2352 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2353 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2354 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2355 return idxReg;
2356 }
2357 return UINT8_MAX;
2358}
2359
2360
2361/**
2362 * Locate a register, possibly freeing one up.
2363 *
2364 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2365 * failed.
2366 *
2367 * @returns Host register number on success. Returns UINT8_MAX if no registers
2368 * found, the caller is supposed to deal with this and raise a
2369 * allocation type specific status code (if desired).
2370 *
2371 * @throws VBox status code if we're run into trouble spilling a variable of
2372 * recording debug info. Does NOT throw anything if we're out of
2373 * registers, though.
2374 */
2375static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2376 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2377{
2378 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2379 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2380
2381 /*
2382 * Try a freed register that's shadowing a guest register
2383 */
2384 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2385 if (fRegs)
2386 {
2387 unsigned const idxReg = (fPreferVolatile
2388 ? ASMBitFirstSetU32(fRegs)
2389 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2390 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2391 - 1;
2392
2393 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2394 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2395 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2396 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2397
2398 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2399 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2400 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2401 return idxReg;
2402 }
2403
2404 /*
2405 * Try free up a variable that's in a register.
2406 *
2407 * We do two rounds here, first evacuating variables we don't need to be
2408 * saved on the stack, then in the second round move things to the stack.
2409 */
2410 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2411 {
2412 uint32_t fVars = pReNative->Core.bmVars;
2413 while (fVars)
2414 {
2415 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2416 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2417 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2418 && (RT_BIT_32(idxReg) & fRegMask)
2419 && ( iLoop == 0
2420 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2421 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2422 {
2423 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2424 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2425 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2426 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2427 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2428 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2429
2430 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2431 {
2432 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
2433 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2434 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeVarCalcBpDisp(pReNative, idxVar), idxReg);
2435 }
2436
2437 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2438 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2439 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2440 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2441 return idxReg;
2442 }
2443 fVars &= ~RT_BIT_32(idxVar);
2444 }
2445 }
2446
2447 return UINT8_MAX;
2448}
2449
2450
2451/**
2452 * Moves a variable to a different register or spills it onto the stack.
2453 *
2454 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2455 * kinds can easily be recreated if needed later.
2456 *
2457 * @returns The new code buffer position, UINT32_MAX on failure.
2458 * @param pReNative The native recompile state.
2459 * @param off The current code buffer position.
2460 * @param idxVar The variable index.
2461 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2462 * call-volatile registers.
2463 */
2464static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2465 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2466{
2467 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2468 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2469
2470 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2471 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2472 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2473 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2474 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2475 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2476 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2477 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2478 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2479
2480
2481 /** @todo Add statistics on this.*/
2482 /** @todo Implement basic variable liveness analysis (python) so variables
2483 * can be freed immediately once no longer used. This has the potential to
2484 * be trashing registers and stack for dead variables. */
2485
2486 /*
2487 * First try move it to a different register, as that's cheaper.
2488 */
2489 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2490 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2491 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2492 if (fRegs)
2493 {
2494 /* Avoid using shadow registers, if possible. */
2495 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2496 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2497 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2498 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2499
2500 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2501 Log12(("iemNativeRegMoveOrSpillStackVar: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
2502 idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2503 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2504
2505 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2506 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2507 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2508 if (fGstRegShadows)
2509 {
2510 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2511 while (fGstRegShadows)
2512 {
2513 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2514 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2515
2516 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2517 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2518 }
2519 }
2520
2521 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2522 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2523 }
2524 /*
2525 * Otherwise we must spill the register onto the stack.
2526 */
2527 else
2528 {
2529 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
2530 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
2531 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
2532 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2533 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
2534
2535 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2536 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2537 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2538 }
2539
2540 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2541 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2542 return off;
2543}
2544
2545
2546/**
2547 * Allocates a temporary host general purpose register.
2548 *
2549 * This may emit code to save register content onto the stack in order to free
2550 * up a register.
2551 *
2552 * @returns The host register number; throws VBox status code on failure,
2553 * so no need to check the return value.
2554 * @param pReNative The native recompile state.
2555 * @param poff Pointer to the variable with the code buffer position.
2556 * This will be update if we need to move a variable from
2557 * register to stack in order to satisfy the request.
2558 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2559 * registers (@c true, default) or the other way around
2560 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2561 */
2562DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2563{
2564 /*
2565 * Try find a completely unused register, preferably a call-volatile one.
2566 */
2567 uint8_t idxReg;
2568 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2569 & ~pReNative->Core.bmHstRegsWithGstShadow
2570 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2571 if (fRegs)
2572 {
2573 if (fPreferVolatile)
2574 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2575 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2576 else
2577 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2578 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2579 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2580 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2581 }
2582 else
2583 {
2584 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2585 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2586 }
2587 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2588}
2589
2590
2591/**
2592 * Allocates a temporary register for loading an immediate value into.
2593 *
2594 * This will emit code to load the immediate, unless there happens to be an
2595 * unused register with the value already loaded.
2596 *
2597 * The caller will not modify the returned register, it must be considered
2598 * read-only. Free using iemNativeRegFreeTmpImm.
2599 *
2600 * @returns The host register number; throws VBox status code on failure, so no
2601 * need to check the return value.
2602 * @param pReNative The native recompile state.
2603 * @param poff Pointer to the variable with the code buffer position.
2604 * @param uImm The immediate value that the register must hold upon
2605 * return.
2606 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2607 * registers (@c true, default) or the other way around
2608 * (@c false).
2609 *
2610 * @note Reusing immediate values has not been implemented yet.
2611 */
2612DECL_HIDDEN_THROW(uint8_t)
2613iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2614{
2615 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2616 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2617 return idxReg;
2618}
2619
2620
2621/**
2622 * Marks host register @a idxHstReg as containing a shadow copy of guest
2623 * register @a enmGstReg.
2624 *
2625 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2626 * host register before calling.
2627 */
2628DECL_FORCE_INLINE(void)
2629iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2630{
2631 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2632 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2633
2634 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2635 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2636 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2637 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2638#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2639 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2640 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2641#else
2642 RT_NOREF(off);
2643#endif
2644}
2645
2646
2647/**
2648 * Clear any guest register shadow claims from @a idxHstReg.
2649 *
2650 * The register does not need to be shadowing any guest registers.
2651 */
2652DECL_FORCE_INLINE(void)
2653iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2654{
2655 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2656 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2657 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2658 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2659 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2660
2661#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2662 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2663 if (fGstRegs)
2664 {
2665 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
2666 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2667 while (fGstRegs)
2668 {
2669 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2670 fGstRegs &= ~RT_BIT_64(iGstReg);
2671 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2672 }
2673 }
2674#else
2675 RT_NOREF(off);
2676#endif
2677
2678 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2679 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2680 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2681}
2682
2683
2684/**
2685 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
2686 * and global overview flags.
2687 */
2688DECL_FORCE_INLINE(void)
2689iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2690{
2691 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2692 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2693 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2694 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2695 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
2696 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2697 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2698
2699#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2700 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2701 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
2702#else
2703 RT_NOREF(off);
2704#endif
2705
2706 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2707 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
2708 if (!fGstRegShadowsNew)
2709 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2710 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
2711}
2712
2713
2714/**
2715 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2716 * to @a idxRegTo.
2717 */
2718DECL_FORCE_INLINE(void)
2719iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2720 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2721{
2722 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2723 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2724 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
2725 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2726 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
2727 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
2728 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2729 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2730
2731 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2732 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
2733 if (!fGstRegShadowsFrom)
2734 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
2735 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
2736 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
2737 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2738#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2739 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2740 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2741#else
2742 RT_NOREF(off);
2743#endif
2744}
2745
2746
2747/**
2748 * Allocates a temporary host general purpose register for keeping a guest
2749 * register value.
2750 *
2751 * Since we may already have a register holding the guest register value,
2752 * code will be emitted to do the loading if that's not the case. Code may also
2753 * be emitted if we have to free up a register to satify the request.
2754 *
2755 * @returns The host register number; throws VBox status code on failure, so no
2756 * need to check the return value.
2757 * @param pReNative The native recompile state.
2758 * @param poff Pointer to the variable with the code buffer
2759 * position. This will be update if we need to move a
2760 * variable from register to stack in order to satisfy
2761 * the request.
2762 * @param enmGstReg The guest register that will is to be updated.
2763 * @param enmIntendedUse How the caller will be using the host register.
2764 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2765 */
2766DECL_HIDDEN_THROW(uint8_t)
2767iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2768 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2769{
2770 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2771#ifdef LOG_ENABLED
2772 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
2773#endif
2774
2775 /*
2776 * First check if the guest register value is already in a host register.
2777 */
2778 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2779 {
2780 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2781 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2782 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2783 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2784
2785 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2786 {
2787 /*
2788 * If the register will trash the guest shadow copy, try find a
2789 * completely unused register we can use instead. If that fails,
2790 * we need to disassociate the host reg from the guest reg.
2791 */
2792 /** @todo would be nice to know if preserving the register is in any way helpful. */
2793 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2794 && ( ~pReNative->Core.bmHstRegs
2795 & ~pReNative->Core.bmHstRegsWithGstShadow
2796 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2797 {
2798 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2799
2800 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2801
2802 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2803 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2804 g_apszIemNativeHstRegNames[idxRegNew]));
2805 idxReg = idxRegNew;
2806 }
2807 else
2808 {
2809 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2810 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2811 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2812 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2813 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2814 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2815 else
2816 {
2817 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2818 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2819 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2820 }
2821 }
2822 }
2823 else
2824 {
2825 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2826 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
2827 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
2828 idxReg, s_pszIntendedUse[enmIntendedUse]));
2829
2830 /*
2831 * Allocate a new register, copy the value and, if updating, the
2832 * guest shadow copy assignment to the new register.
2833 */
2834 /** @todo share register for readonly access. */
2835 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2836
2837 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2838 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2839
2840 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2841 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2842 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2843 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2844 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2845 else
2846 {
2847 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2848 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
2849 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2850 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2851 }
2852 idxReg = idxRegNew;
2853 }
2854
2855#ifdef VBOX_STRICT
2856 /* Strict builds: Check that the value is correct. */
2857 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2858#endif
2859
2860 return idxReg;
2861 }
2862
2863 /*
2864 * Allocate a new register, load it with the guest value and designate it as a copy of the
2865 */
2866 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2867
2868 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2869 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2870
2871 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2872 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2873 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2874 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2875
2876 return idxRegNew;
2877}
2878
2879
2880/**
2881 * Allocates a temporary host general purpose register that already holds the
2882 * given guest register value.
2883 *
2884 * The use case for this function is places where the shadowing state cannot be
2885 * modified due to branching and such. This will fail if the we don't have a
2886 * current shadow copy handy or if it's incompatible. The only code that will
2887 * be emitted here is value checking code in strict builds.
2888 *
2889 * The intended use can only be readonly!
2890 *
2891 * @returns The host register number, UINT8_MAX if not present.
2892 * @param pReNative The native recompile state.
2893 * @param poff Pointer to the instruction buffer offset.
2894 * Will be updated in strict builds if a register is
2895 * found.
2896 * @param enmGstReg The guest register that will is to be updated.
2897 * @note In strict builds, this may throw instruction buffer growth failures.
2898 * Non-strict builds will not throw anything.
2899 * @sa iemNativeRegAllocTmpForGuestReg
2900 */
2901DECL_HIDDEN_THROW(uint8_t)
2902iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2903{
2904 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2905
2906 /*
2907 * First check if the guest register value is already in a host register.
2908 */
2909 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2910 {
2911 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2912 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2913 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2914 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2915
2916 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2917 {
2918 /*
2919 * We only do readonly use here, so easy compared to the other
2920 * variant of this code.
2921 */
2922 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2923 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2924 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2925 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2926 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2927
2928#ifdef VBOX_STRICT
2929 /* Strict builds: Check that the value is correct. */
2930 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2931#else
2932 RT_NOREF(poff);
2933#endif
2934 return idxReg;
2935 }
2936 }
2937
2938 return UINT8_MAX;
2939}
2940
2941
2942DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2943
2944
2945/**
2946 * Allocates argument registers for a function call.
2947 *
2948 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2949 * need to check the return value.
2950 * @param pReNative The native recompile state.
2951 * @param off The current code buffer offset.
2952 * @param cArgs The number of arguments the function call takes.
2953 */
2954DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2955{
2956 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2957 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2958 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2959 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2960
2961 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2962 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2963 else if (cArgs == 0)
2964 return true;
2965
2966 /*
2967 * Do we get luck and all register are free and not shadowing anything?
2968 */
2969 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2970 for (uint32_t i = 0; i < cArgs; i++)
2971 {
2972 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2973 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2974 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2975 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2976 }
2977 /*
2978 * Okay, not lucky so we have to free up the registers.
2979 */
2980 else
2981 for (uint32_t i = 0; i < cArgs; i++)
2982 {
2983 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2984 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2985 {
2986 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2987 {
2988 case kIemNativeWhat_Var:
2989 {
2990 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2991 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2992 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2993 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2994 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2995
2996 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2997 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2998 else
2999 {
3000 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3001 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3002 }
3003 break;
3004 }
3005
3006 case kIemNativeWhat_Tmp:
3007 case kIemNativeWhat_Arg:
3008 case kIemNativeWhat_rc:
3009 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3010 default:
3011 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3012 }
3013
3014 }
3015 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3016 {
3017 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3018 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3019 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3020 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3021 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3022 }
3023 else
3024 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3025 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3026 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3027 }
3028 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3029 return true;
3030}
3031
3032
3033DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3034
3035
3036#if 0
3037/**
3038 * Frees a register assignment of any type.
3039 *
3040 * @param pReNative The native recompile state.
3041 * @param idxHstReg The register to free.
3042 *
3043 * @note Does not update variables.
3044 */
3045DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3046{
3047 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3048 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3049 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3050 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3051 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3052 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3053 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3054 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3055 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3056 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3057 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3058 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3059 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3060 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3061
3062 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3063 /* no flushing, right:
3064 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3065 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3066 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3067 */
3068}
3069#endif
3070
3071
3072/**
3073 * Frees a temporary register.
3074 *
3075 * Any shadow copies of guest registers assigned to the host register will not
3076 * be flushed by this operation.
3077 */
3078DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3079{
3080 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3081 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3082 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3083 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3084 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3085}
3086
3087
3088/**
3089 * Frees a temporary immediate register.
3090 *
3091 * It is assumed that the call has not modified the register, so it still hold
3092 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3093 */
3094DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3095{
3096 iemNativeRegFreeTmp(pReNative, idxHstReg);
3097}
3098
3099
3100/**
3101 * Called right before emitting a call instruction to move anything important
3102 * out of call-volatile registers, free and flush the call-volatile registers,
3103 * optionally freeing argument variables.
3104 *
3105 * @returns New code buffer offset, UINT32_MAX on failure.
3106 * @param pReNative The native recompile state.
3107 * @param off The code buffer offset.
3108 * @param cArgs The number of arguments the function call takes.
3109 * It is presumed that the host register part of these have
3110 * been allocated as such already and won't need moving,
3111 * just freeing.
3112 */
3113DECL_HIDDEN_THROW(uint32_t)
3114iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3115{
3116 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
3117
3118 /*
3119 * Move anything important out of volatile registers.
3120 */
3121 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3122 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3123 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
3124#ifdef IEMNATIVE_REG_FIXED_TMP0
3125 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3126#endif
3127 & ~g_afIemNativeCallRegs[cArgs];
3128
3129 fRegsToMove &= pReNative->Core.bmHstRegs;
3130 if (!fRegsToMove)
3131 { /* likely */ }
3132 else
3133 {
3134 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
3135 while (fRegsToMove != 0)
3136 {
3137 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
3138 fRegsToMove &= ~RT_BIT_32(idxReg);
3139
3140 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3141 {
3142 case kIemNativeWhat_Var:
3143 {
3144 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3145 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3146 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3147 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3148 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
3149 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
3150 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3151 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3152 else
3153 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3154 continue;
3155 }
3156
3157 case kIemNativeWhat_Arg:
3158 AssertMsgFailed(("What?!?: %u\n", idxReg));
3159 continue;
3160
3161 case kIemNativeWhat_rc:
3162 case kIemNativeWhat_Tmp:
3163 AssertMsgFailed(("Missing free: %u\n", idxReg));
3164 continue;
3165
3166 case kIemNativeWhat_FixedTmp:
3167 case kIemNativeWhat_pVCpuFixed:
3168 case kIemNativeWhat_pCtxFixed:
3169 case kIemNativeWhat_FixedReserved:
3170 case kIemNativeWhat_Invalid:
3171 case kIemNativeWhat_End:
3172 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3173 }
3174 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3175 }
3176 }
3177
3178 /*
3179 * Do the actual freeing.
3180 */
3181 if (pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3182 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n", pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3183 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3184
3185 /* If there are guest register shadows in any call-volatile register, we
3186 have to clear the corrsponding guest register masks for each register. */
3187 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3188 if (fHstRegsWithGstShadow)
3189 {
3190 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3191 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
3192 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3193 do
3194 {
3195 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3196 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3197
3198 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
3199 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3200 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3201 } while (fHstRegsWithGstShadow != 0);
3202 }
3203
3204 return off;
3205}
3206
3207
3208/**
3209 * Flushes a set of guest register shadow copies.
3210 *
3211 * This is usually done after calling a threaded function or a C-implementation
3212 * of an instruction.
3213 *
3214 * @param pReNative The native recompile state.
3215 * @param fGstRegs Set of guest registers to flush.
3216 */
3217DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3218{
3219 /*
3220 * Reduce the mask by what's currently shadowed
3221 */
3222 fGstRegs &= pReNative->Core.bmGstRegShadows;
3223 if (fGstRegs)
3224 {
3225 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n",
3226 fGstRegs, pReNative->Core.bmGstRegShadows, pReNative->Core.bmGstRegShadows & ~fGstRegs));
3227 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3228 if (pReNative->Core.bmGstRegShadows)
3229 {
3230 /*
3231 * Partial.
3232 */
3233 do
3234 {
3235 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3236 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3237 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3238 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3239 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3240
3241 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3242 fGstRegs &= ~fInThisHstReg;
3243 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3244 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3245 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3246 } while (fGstRegs != 0);
3247 }
3248 else
3249 {
3250 /*
3251 * Clear all.
3252 */
3253 do
3254 {
3255 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3256 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3257 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3258 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3259 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3260
3261 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3262 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3263 } while (fGstRegs != 0);
3264 pReNative->Core.bmHstRegsWithGstShadow = 0;
3265 }
3266 }
3267}
3268
3269
3270/**
3271 * Flushes delayed write of a specific guest register.
3272 *
3273 * This must be called prior to calling CImpl functions and any helpers that use
3274 * the guest state (like raising exceptions) and such.
3275 *
3276 * This optimization has not yet been implemented. The first target would be
3277 * RIP updates, since these are the most common ones.
3278 */
3279DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3280 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
3281{
3282 RT_NOREF(pReNative, enmClass, idxReg);
3283 return off;
3284}
3285
3286
3287/**
3288 * Flushes any delayed guest register writes.
3289 *
3290 * This must be called prior to calling CImpl functions and any helpers that use
3291 * the guest state (like raising exceptions) and such.
3292 *
3293 * This optimization has not yet been implemented. The first target would be
3294 * RIP updates, since these are the most common ones.
3295 */
3296DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3297{
3298 RT_NOREF(pReNative, off);
3299 return off;
3300}
3301
3302
3303#ifdef VBOX_STRICT
3304/**
3305 * Does internal register allocator sanity checks.
3306 */
3307static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
3308{
3309 /*
3310 * Iterate host registers building a guest shadowing set.
3311 */
3312 uint64_t bmGstRegShadows = 0;
3313 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
3314 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
3315 while (bmHstRegsWithGstShadow)
3316 {
3317 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
3318 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3319 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3320
3321 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3322 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
3323 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
3324 bmGstRegShadows |= fThisGstRegShadows;
3325 while (fThisGstRegShadows)
3326 {
3327 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
3328 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
3329 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
3330 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
3331 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
3332 }
3333 }
3334 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
3335 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
3336 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
3337
3338 /*
3339 * Now the other way around, checking the guest to host index array.
3340 */
3341 bmHstRegsWithGstShadow = 0;
3342 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
3343 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3344 while (bmGstRegShadows)
3345 {
3346 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
3347 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3348 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
3349
3350 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3351 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
3352 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
3353 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
3354 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3355 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3356 }
3357 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
3358 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
3359 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
3360}
3361#endif
3362
3363
3364/*********************************************************************************************************************************
3365* Code Emitters (larger snippets) *
3366*********************************************************************************************************************************/
3367
3368/**
3369 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3370 * extending to 64-bit width.
3371 *
3372 * @returns New code buffer offset on success, UINT32_MAX on failure.
3373 * @param pReNative .
3374 * @param off The current code buffer position.
3375 * @param idxHstReg The host register to load the guest register value into.
3376 * @param enmGstReg The guest register to load.
3377 *
3378 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3379 * that is something the caller needs to do if applicable.
3380 */
3381DECL_HIDDEN_THROW(uint32_t)
3382iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3383{
3384 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3385 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3386
3387 switch (g_aGstShadowInfo[enmGstReg].cb)
3388 {
3389 case sizeof(uint64_t):
3390 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3391 case sizeof(uint32_t):
3392 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3393 case sizeof(uint16_t):
3394 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3395#if 0 /* not present in the table. */
3396 case sizeof(uint8_t):
3397 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3398#endif
3399 default:
3400 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3401 }
3402}
3403
3404
3405#ifdef VBOX_STRICT
3406/**
3407 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
3408 *
3409 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3410 * Trashes EFLAGS on AMD64.
3411 */
3412static uint32_t
3413iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
3414{
3415# ifdef RT_ARCH_AMD64
3416 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3417
3418 /* rol reg64, 32 */
3419 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3420 pbCodeBuf[off++] = 0xc1;
3421 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3422 pbCodeBuf[off++] = 32;
3423
3424 /* test reg32, ffffffffh */
3425 if (idxReg >= 8)
3426 pbCodeBuf[off++] = X86_OP_REX_B;
3427 pbCodeBuf[off++] = 0xf7;
3428 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3429 pbCodeBuf[off++] = 0xff;
3430 pbCodeBuf[off++] = 0xff;
3431 pbCodeBuf[off++] = 0xff;
3432 pbCodeBuf[off++] = 0xff;
3433
3434 /* je/jz +1 */
3435 pbCodeBuf[off++] = 0x74;
3436 pbCodeBuf[off++] = 0x01;
3437
3438 /* int3 */
3439 pbCodeBuf[off++] = 0xcc;
3440
3441 /* rol reg64, 32 */
3442 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3443 pbCodeBuf[off++] = 0xc1;
3444 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3445 pbCodeBuf[off++] = 32;
3446
3447# elif defined(RT_ARCH_ARM64)
3448 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3449 /* lsr tmp0, reg64, #32 */
3450 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
3451 /* cbz tmp0, +1 */
3452 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3453 /* brk #0x1100 */
3454 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
3455
3456# else
3457# error "Port me!"
3458# endif
3459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3460 return off;
3461}
3462#endif /* VBOX_STRICT */
3463
3464
3465#ifdef VBOX_STRICT
3466/**
3467 * Emitting code that checks that the content of register @a idxReg is the same
3468 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3469 * instruction if that's not the case.
3470 *
3471 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3472 * Trashes EFLAGS on AMD64.
3473 */
3474static uint32_t
3475iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3476{
3477# ifdef RT_ARCH_AMD64
3478 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3479
3480 /* cmp reg, [mem] */
3481 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3482 {
3483 if (idxReg >= 8)
3484 pbCodeBuf[off++] = X86_OP_REX_R;
3485 pbCodeBuf[off++] = 0x38;
3486 }
3487 else
3488 {
3489 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3490 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3491 else
3492 {
3493 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3494 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3495 else
3496 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3497 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3498 if (idxReg >= 8)
3499 pbCodeBuf[off++] = X86_OP_REX_R;
3500 }
3501 pbCodeBuf[off++] = 0x39;
3502 }
3503 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3504
3505 /* je/jz +1 */
3506 pbCodeBuf[off++] = 0x74;
3507 pbCodeBuf[off++] = 0x01;
3508
3509 /* int3 */
3510 pbCodeBuf[off++] = 0xcc;
3511
3512 /* For values smaller than the register size, we must check that the rest
3513 of the register is all zeros. */
3514 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3515 {
3516 /* test reg64, imm32 */
3517 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3518 pbCodeBuf[off++] = 0xf7;
3519 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3520 pbCodeBuf[off++] = 0;
3521 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3522 pbCodeBuf[off++] = 0xff;
3523 pbCodeBuf[off++] = 0xff;
3524
3525 /* je/jz +1 */
3526 pbCodeBuf[off++] = 0x74;
3527 pbCodeBuf[off++] = 0x01;
3528
3529 /* int3 */
3530 pbCodeBuf[off++] = 0xcc;
3531 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3532 }
3533 else
3534 {
3535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3536 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3537 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
3538 }
3539
3540# elif defined(RT_ARCH_ARM64)
3541 /* mov TMP0, [gstreg] */
3542 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3543
3544 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3545 /* sub tmp0, tmp0, idxReg */
3546 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3547 /* cbz tmp0, +1 */
3548 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3549 /* brk #0x1000+enmGstReg */
3550 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3552
3553# else
3554# error "Port me!"
3555# endif
3556 return off;
3557}
3558#endif /* VBOX_STRICT */
3559
3560
3561#ifdef VBOX_STRICT
3562/**
3563 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
3564 * important bits.
3565 *
3566 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3567 * Trashes EFLAGS on AMD64.
3568 */
3569static uint32_t
3570iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
3571{
3572 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3573 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
3574 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
3575 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
3576
3577#ifdef RT_ARCH_AMD64
3578 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3579
3580 /* je/jz +1 */
3581 pbCodeBuf[off++] = 0x74;
3582 pbCodeBuf[off++] = 0x01;
3583
3584 /* int3 */
3585 pbCodeBuf[off++] = 0xcc;
3586
3587# elif defined(RT_ARCH_ARM64)
3588 /* mov TMP0, [gstreg] */
3589 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3590
3591 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3592 /* b.eq +1 */
3593 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
3594 /* brk #0x1000+enmGstReg */
3595 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
3596
3597# else
3598# error "Port me!"
3599# endif
3600 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3601
3602 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3603 return off;
3604}
3605#endif /* VBOX_STRICT */
3606
3607
3608/**
3609 * Emits a code for checking the return code of a call and rcPassUp, returning
3610 * from the code if either are non-zero.
3611 */
3612DECL_HIDDEN_THROW(uint32_t)
3613iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3614{
3615#ifdef RT_ARCH_AMD64
3616 /*
3617 * AMD64: eax = call status code.
3618 */
3619
3620 /* edx = rcPassUp */
3621 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3622# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3623 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3624# endif
3625
3626 /* edx = eax | rcPassUp */
3627 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3628 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3629 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3630 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3631
3632 /* Jump to non-zero status return path. */
3633 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3634
3635 /* done. */
3636
3637#elif RT_ARCH_ARM64
3638 /*
3639 * ARM64: w0 = call status code.
3640 */
3641 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3642 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3643
3644 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3645
3646 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3647
3648 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3649 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3650 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3651
3652#else
3653# error "port me"
3654#endif
3655 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3656 return off;
3657}
3658
3659
3660/**
3661 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3662 * raising a \#GP(0) if it isn't.
3663 *
3664 * @returns New code buffer offset, UINT32_MAX on failure.
3665 * @param pReNative The native recompile state.
3666 * @param off The code buffer offset.
3667 * @param idxAddrReg The host register with the address to check.
3668 * @param idxInstr The current instruction.
3669 */
3670DECL_HIDDEN_THROW(uint32_t)
3671iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3672{
3673 RT_NOREF(idxInstr);
3674
3675 /*
3676 * Make sure we don't have any outstanding guest register writes as we may
3677 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3678 */
3679 off = iemNativeRegFlushPendingWrites(pReNative, off);
3680
3681#ifdef RT_ARCH_AMD64
3682 /*
3683 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3684 * return raisexcpt();
3685 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3686 */
3687 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3688
3689 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3690 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3691 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3692 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3693
3694# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3695 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3696# else
3697 uint32_t const offFixup = off;
3698 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3699 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3700 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3701 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3702# endif
3703
3704 iemNativeRegFreeTmp(pReNative, iTmpReg);
3705
3706#elif defined(RT_ARCH_ARM64)
3707 /*
3708 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3709 * return raisexcpt();
3710 * ----
3711 * mov x1, 0x800000000000
3712 * add x1, x0, x1
3713 * cmp xzr, x1, lsr 48
3714 * and either:
3715 * b.ne .Lraisexcpt
3716 * or:
3717 * b.eq .Lnoexcept
3718 * movz x1, #instruction-number
3719 * b .Lraisexcpt
3720 * .Lnoexcept:
3721 */
3722 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3723
3724 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3725 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3726 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3727
3728# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3729 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3730# else
3731 uint32_t const offFixup = off;
3732 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3733 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3734 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3735 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3736# endif
3737
3738 iemNativeRegFreeTmp(pReNative, iTmpReg);
3739
3740#else
3741# error "Port me"
3742#endif
3743 return off;
3744}
3745
3746
3747/**
3748 * Emits code to check if the content of @a idxAddrReg is within the limit of
3749 * idxSegReg, raising a \#GP(0) if it isn't.
3750 *
3751 * @returns New code buffer offset; throws VBox status code on error.
3752 * @param pReNative The native recompile state.
3753 * @param off The code buffer offset.
3754 * @param idxAddrReg The host register (32-bit) with the address to
3755 * check.
3756 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3757 * against.
3758 * @param idxInstr The current instruction.
3759 */
3760DECL_HIDDEN_THROW(uint32_t)
3761iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3762 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3763{
3764 /*
3765 * Make sure we don't have any outstanding guest register writes as we may
3766 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3767 */
3768 off = iemNativeRegFlushPendingWrites(pReNative, off);
3769
3770 /** @todo implement expand down/whatnot checking */
3771 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3772
3773 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3774 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3775 kIemNativeGstRegUse_ForUpdate);
3776
3777 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3778
3779#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3780 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3781 RT_NOREF(idxInstr);
3782#else
3783 uint32_t const offFixup = off;
3784 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3785 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3786 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3787 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3788#endif
3789
3790 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3791 return off;
3792}
3793
3794
3795/**
3796 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3797 *
3798 * @returns The flush mask.
3799 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3800 * @param fGstShwFlush The starting flush mask.
3801 */
3802DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3803{
3804 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3805 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3806 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3807 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3808 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3809 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3810 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3811 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3812 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3813 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3814 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3815 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3816 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3817 return fGstShwFlush;
3818}
3819
3820
3821/**
3822 * Emits a call to a CImpl function or something similar.
3823 */
3824static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3825 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3826 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3827{
3828 /*
3829 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
3830 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
3831 */
3832 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
3833 fGstShwFlush
3834 | RT_BIT_64(kIemNativeGstReg_Pc)
3835 | RT_BIT_64(kIemNativeGstReg_EFlags));
3836 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3837
3838 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3839
3840 /*
3841 * Load the parameters.
3842 */
3843#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3844 /* Special code the hidden VBOXSTRICTRC pointer. */
3845 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3846 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3847 if (cAddParams > 0)
3848 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3849 if (cAddParams > 1)
3850 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3851 if (cAddParams > 2)
3852 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3853 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3854
3855#else
3856 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3857 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3858 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3859 if (cAddParams > 0)
3860 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3861 if (cAddParams > 1)
3862 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3863 if (cAddParams > 2)
3864# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3865 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3866# else
3867 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3868# endif
3869#endif
3870
3871 /*
3872 * Make the call.
3873 */
3874 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3875
3876#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3877 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3878#endif
3879
3880 /*
3881 * Check the status code.
3882 */
3883 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3884}
3885
3886
3887/**
3888 * Emits a call to a threaded worker function.
3889 */
3890static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3891{
3892 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3893 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3894 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3895
3896#ifdef RT_ARCH_AMD64
3897 /* Load the parameters and emit the call. */
3898# ifdef RT_OS_WINDOWS
3899# ifndef VBOXSTRICTRC_STRICT_ENABLED
3900 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3901 if (cParams > 0)
3902 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3903 if (cParams > 1)
3904 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3905 if (cParams > 2)
3906 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3907# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3908 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3909 if (cParams > 0)
3910 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3911 if (cParams > 1)
3912 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3913 if (cParams > 2)
3914 {
3915 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3916 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3917 }
3918 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3919# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3920# else
3921 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3922 if (cParams > 0)
3923 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3924 if (cParams > 1)
3925 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3926 if (cParams > 2)
3927 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3928# endif
3929
3930 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3931
3932# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3933 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3934# endif
3935
3936#elif RT_ARCH_ARM64
3937 /*
3938 * ARM64:
3939 */
3940 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3941 if (cParams > 0)
3942 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3943 if (cParams > 1)
3944 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3945 if (cParams > 2)
3946 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3947
3948 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3949
3950#else
3951# error "port me"
3952#endif
3953
3954 /*
3955 * Check the status code.
3956 */
3957 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3958
3959 return off;
3960}
3961
3962
3963/**
3964 * Emits the code at the RaiseGP0 label.
3965 */
3966static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3967{
3968 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3969 if (idxLabel != UINT32_MAX)
3970 {
3971 iemNativeLabelDefine(pReNative, idxLabel, off);
3972
3973 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3974 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3975#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3976 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3977#endif
3978 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3979
3980 /* jump back to the return sequence. */
3981 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3982 }
3983 return off;
3984}
3985
3986
3987/**
3988 * Emits the code at the ReturnWithFlags label (returns
3989 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3990 */
3991static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3992{
3993 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3994 if (idxLabel != UINT32_MAX)
3995 {
3996 iemNativeLabelDefine(pReNative, idxLabel, off);
3997
3998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3999
4000 /* jump back to the return sequence. */
4001 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4002 }
4003 return off;
4004}
4005
4006
4007/**
4008 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
4009 */
4010static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4011{
4012 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
4013 if (idxLabel != UINT32_MAX)
4014 {
4015 iemNativeLabelDefine(pReNative, idxLabel, off);
4016
4017 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
4018
4019 /* jump back to the return sequence. */
4020 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4021 }
4022 return off;
4023}
4024
4025
4026/**
4027 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
4028 */
4029static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
4030{
4031 /*
4032 * Generate the rc + rcPassUp fiddling code if needed.
4033 */
4034 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
4035 if (idxLabel != UINT32_MAX)
4036 {
4037 iemNativeLabelDefine(pReNative, idxLabel, off);
4038
4039 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
4040#ifdef RT_ARCH_AMD64
4041# ifdef RT_OS_WINDOWS
4042# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4043 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
4044# endif
4045 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
4046 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
4047# else
4048 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
4049 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
4050# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4051 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
4052# endif
4053# endif
4054# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4055 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
4056# endif
4057
4058#else
4059 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
4060 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4061 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
4062#endif
4063
4064 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
4065 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
4066 }
4067 return off;
4068}
4069
4070
4071/**
4072 * Emits a standard epilog.
4073 */
4074static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
4075{
4076 *pidxReturnLabel = UINT32_MAX;
4077
4078 /*
4079 * Successful return, so clear the return register (eax, w0).
4080 */
4081 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
4082
4083 /*
4084 * Define label for common return point.
4085 */
4086 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
4087 *pidxReturnLabel = idxReturn;
4088
4089 /*
4090 * Restore registers and return.
4091 */
4092#ifdef RT_ARCH_AMD64
4093 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
4094
4095 /* Reposition esp at the r15 restore point. */
4096 pbCodeBuf[off++] = X86_OP_REX_W;
4097 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
4098 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
4099 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
4100
4101 /* Pop non-volatile registers and return */
4102 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
4103 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
4104 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
4105 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
4106 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
4107 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
4108 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
4109 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
4110# ifdef RT_OS_WINDOWS
4111 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
4112 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
4113# endif
4114 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
4115 pbCodeBuf[off++] = 0xc9; /* leave */
4116 pbCodeBuf[off++] = 0xc3; /* ret */
4117 pbCodeBuf[off++] = 0xcc; /* int3 poison */
4118
4119#elif RT_ARCH_ARM64
4120 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4121
4122 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
4123 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
4124 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4125 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4126 IEMNATIVE_FRAME_VAR_SIZE / 8);
4127 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
4128 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4129 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4130 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4131 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4132 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4133 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4134 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4135 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4136 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4137 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4138 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4139
4140 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
4141 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
4142 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
4143 IEMNATIVE_FRAME_SAVE_REG_SIZE);
4144
4145 /* retab / ret */
4146# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
4147 if (1)
4148 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
4149 else
4150# endif
4151 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
4152
4153#else
4154# error "port me"
4155#endif
4156 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4157
4158 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
4159}
4160
4161
4162/**
4163 * Emits a standard prolog.
4164 */
4165static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4166{
4167#ifdef RT_ARCH_AMD64
4168 /*
4169 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
4170 * reserving 64 bytes for stack variables plus 4 non-register argument
4171 * slots. Fixed register assignment: xBX = pReNative;
4172 *
4173 * Since we always do the same register spilling, we can use the same
4174 * unwind description for all the code.
4175 */
4176 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4177 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
4178 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
4179 pbCodeBuf[off++] = 0x8b;
4180 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
4181 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
4182 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
4183# ifdef RT_OS_WINDOWS
4184 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
4185 pbCodeBuf[off++] = 0x8b;
4186 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
4187 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
4188 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
4189# else
4190 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
4191 pbCodeBuf[off++] = 0x8b;
4192 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
4193# endif
4194 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
4195 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
4196 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
4197 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
4198 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
4199 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
4200 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
4201 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
4202
4203 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
4204 X86_GREG_xSP,
4205 IEMNATIVE_FRAME_ALIGN_SIZE
4206 + IEMNATIVE_FRAME_VAR_SIZE
4207 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
4208 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
4209 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
4210 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
4211 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
4212
4213#elif RT_ARCH_ARM64
4214 /*
4215 * We set up a stack frame exactly like on x86, only we have to push the
4216 * return address our selves here. We save all non-volatile registers.
4217 */
4218 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4219
4220# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
4221 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
4222 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
4223 * in any way conditional, so just emitting this instructions now and hoping for the best... */
4224 /* pacibsp */
4225 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
4226# endif
4227
4228 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
4229 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
4230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4231 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4232 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
4233 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
4234 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4235 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4236 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4237 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4238 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4239 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4240 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4241 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4242 /* Save the BP and LR (ret address) registers at the top of the frame. */
4243 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4244 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4245 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4246 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
4247 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
4248 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
4249
4250 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
4251 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
4252
4253 /* mov r28, r0 */
4254 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
4255 /* mov r27, r1 */
4256 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
4257
4258#else
4259# error "port me"
4260#endif
4261 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4262 return off;
4263}
4264
4265
4266
4267
4268/*********************************************************************************************************************************
4269* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
4270*********************************************************************************************************************************/
4271
4272#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
4273 { \
4274 Assert(pReNative->Core.bmVars == 0); \
4275 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
4276 Assert(pReNative->Core.bmStack == 0); \
4277 pReNative->fMc = (a_fMcFlags); \
4278 pReNative->fCImpl = (a_fCImplFlags); \
4279 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
4280
4281/** We have to get to the end in recompilation mode, as otherwise we won't
4282 * generate code for all the IEM_MC_IF_XXX branches. */
4283#define IEM_MC_END() \
4284 iemNativeVarFreeAll(pReNative); \
4285 } return off
4286
4287
4288
4289/*********************************************************************************************************************************
4290* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
4291*********************************************************************************************************************************/
4292
4293#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
4294 pReNative->fMc = 0; \
4295 pReNative->fCImpl = (a_fFlags); \
4296 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
4297
4298
4299#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4300 pReNative->fMc = 0; \
4301 pReNative->fCImpl = (a_fFlags); \
4302 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
4303
4304DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4305 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4306 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
4307{
4308 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
4309}
4310
4311
4312#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4313 pReNative->fMc = 0; \
4314 pReNative->fCImpl = (a_fFlags); \
4315 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4316 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
4317
4318DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4319 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4320 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
4321{
4322 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
4323}
4324
4325
4326#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4327 pReNative->fMc = 0; \
4328 pReNative->fCImpl = (a_fFlags); \
4329 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4330 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
4331
4332DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4333 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4334 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
4335 uint64_t uArg2)
4336{
4337 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
4338}
4339
4340
4341
4342/*********************************************************************************************************************************
4343* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
4344*********************************************************************************************************************************/
4345
4346/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4347 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
4348DECL_INLINE_THROW(uint32_t)
4349iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4350{
4351 /*
4352 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4353 * return with special status code and make the execution loop deal with
4354 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4355 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4356 * could continue w/o interruption, it probably will drop into the
4357 * debugger, so not worth the effort of trying to services it here and we
4358 * just lump it in with the handling of the others.
4359 *
4360 * To simplify the code and the register state management even more (wrt
4361 * immediate in AND operation), we always update the flags and skip the
4362 * extra check associated conditional jump.
4363 */
4364 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4365 <= UINT32_MAX);
4366 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4367 kIemNativeGstRegUse_ForUpdate);
4368 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4369 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4370 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4371 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4372 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4373
4374 /* Free but don't flush the EFLAGS register. */
4375 iemNativeRegFreeTmp(pReNative, idxEflReg);
4376
4377 return off;
4378}
4379
4380
4381#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4382 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4383
4384#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4385 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4386 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4387
4388/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4389DECL_INLINE_THROW(uint32_t)
4390iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4391{
4392 /* Allocate a temporary PC register. */
4393 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4394
4395 /* Perform the addition and store the result. */
4396 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4397 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4398
4399 /* Free but don't flush the PC register. */
4400 iemNativeRegFreeTmp(pReNative, idxPcReg);
4401
4402 return off;
4403}
4404
4405
4406#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4407 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4408
4409#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4410 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4411 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4412
4413/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4414DECL_INLINE_THROW(uint32_t)
4415iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4416{
4417 /* Allocate a temporary PC register. */
4418 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4419
4420 /* Perform the addition and store the result. */
4421 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4422 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4423
4424 /* Free but don't flush the PC register. */
4425 iemNativeRegFreeTmp(pReNative, idxPcReg);
4426
4427 return off;
4428}
4429
4430
4431#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4432 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4433
4434#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4435 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4436 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4437
4438/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4439DECL_INLINE_THROW(uint32_t)
4440iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4441{
4442 /* Allocate a temporary PC register. */
4443 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4444
4445 /* Perform the addition and store the result. */
4446 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4447 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4448 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4449
4450 /* Free but don't flush the PC register. */
4451 iemNativeRegFreeTmp(pReNative, idxPcReg);
4452
4453 return off;
4454}
4455
4456
4457
4458/*********************************************************************************************************************************
4459* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4460*********************************************************************************************************************************/
4461
4462#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4463 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4464 (a_enmEffOpSize), pCallEntry->idxInstr)
4465
4466#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4467 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4468 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4469
4470#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4471 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4472 IEMMODE_16BIT, pCallEntry->idxInstr)
4473
4474#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4475 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4476 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4477
4478#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4479 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4480 IEMMODE_64BIT, pCallEntry->idxInstr)
4481
4482#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4483 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4484 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4485
4486/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4487 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4488 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4489DECL_INLINE_THROW(uint32_t)
4490iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4491 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4492{
4493 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4494
4495 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4496 off = iemNativeRegFlushPendingWrites(pReNative, off);
4497
4498 /* Allocate a temporary PC register. */
4499 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4500
4501 /* Perform the addition. */
4502 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4503
4504 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4505 {
4506 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4507 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4508 }
4509 else
4510 {
4511 /* Just truncate the result to 16-bit IP. */
4512 Assert(enmEffOpSize == IEMMODE_16BIT);
4513 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4514 }
4515 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4516
4517 /* Free but don't flush the PC register. */
4518 iemNativeRegFreeTmp(pReNative, idxPcReg);
4519
4520 return off;
4521}
4522
4523
4524#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4525 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4526 (a_enmEffOpSize), pCallEntry->idxInstr)
4527
4528#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4529 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4530 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4531
4532#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4533 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4534 IEMMODE_16BIT, pCallEntry->idxInstr)
4535
4536#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4537 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4538 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4539
4540#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4541 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4542 IEMMODE_32BIT, pCallEntry->idxInstr)
4543
4544#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4545 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4546 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4547
4548/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4549 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4550 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4551DECL_INLINE_THROW(uint32_t)
4552iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4553 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4554{
4555 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4556
4557 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4558 off = iemNativeRegFlushPendingWrites(pReNative, off);
4559
4560 /* Allocate a temporary PC register. */
4561 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4562
4563 /* Perform the addition. */
4564 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4565
4566 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4567 if (enmEffOpSize == IEMMODE_16BIT)
4568 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4569
4570 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4571 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4572
4573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4574
4575 /* Free but don't flush the PC register. */
4576 iemNativeRegFreeTmp(pReNative, idxPcReg);
4577
4578 return off;
4579}
4580
4581
4582#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4583 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4584
4585#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4586 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4587 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4588
4589#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4590 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4591
4592#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4593 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4594 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4595
4596#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4597 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4598
4599#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4600 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4601 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4602
4603/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4604DECL_INLINE_THROW(uint32_t)
4605iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4606 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4607{
4608 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4609 off = iemNativeRegFlushPendingWrites(pReNative, off);
4610
4611 /* Allocate a temporary PC register. */
4612 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4613
4614 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4615 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4616 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4617 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4618 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4619
4620 /* Free but don't flush the PC register. */
4621 iemNativeRegFreeTmp(pReNative, idxPcReg);
4622
4623 return off;
4624}
4625
4626
4627
4628/*********************************************************************************************************************************
4629* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4630*********************************************************************************************************************************/
4631
4632/**
4633 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4634 *
4635 * @returns Pointer to the condition stack entry on success, NULL on failure
4636 * (too many nestings)
4637 */
4638DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4639{
4640 uint32_t const idxStack = pReNative->cCondDepth;
4641 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4642
4643 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4644 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4645
4646 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4647 pEntry->fInElse = false;
4648 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4649 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4650
4651 return pEntry;
4652}
4653
4654
4655/**
4656 * Start of the if-block, snapshotting the register and variable state.
4657 */
4658DECL_INLINE_THROW(void)
4659iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4660{
4661 Assert(offIfBlock != UINT32_MAX);
4662 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4663 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4664 Assert(!pEntry->fInElse);
4665
4666 /* Define the start of the IF block if request or for disassembly purposes. */
4667 if (idxLabelIf != UINT32_MAX)
4668 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4669#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4670 else
4671 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4672#else
4673 RT_NOREF(offIfBlock);
4674#endif
4675
4676 /* Copy the initial state so we can restore it in the 'else' block. */
4677 pEntry->InitialState = pReNative->Core;
4678}
4679
4680
4681#define IEM_MC_ELSE() } while (0); \
4682 off = iemNativeEmitElse(pReNative, off); \
4683 do {
4684
4685/** Emits code related to IEM_MC_ELSE. */
4686DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4687{
4688 /* Check sanity and get the conditional stack entry. */
4689 Assert(off != UINT32_MAX);
4690 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4691 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4692 Assert(!pEntry->fInElse);
4693
4694 /* Jump to the endif */
4695 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4696
4697 /* Define the else label and enter the else part of the condition. */
4698 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4699 pEntry->fInElse = true;
4700
4701 /* Snapshot the core state so we can do a merge at the endif and restore
4702 the snapshot we took at the start of the if-block. */
4703 pEntry->IfFinalState = pReNative->Core;
4704 pReNative->Core = pEntry->InitialState;
4705
4706 return off;
4707}
4708
4709
4710#define IEM_MC_ENDIF() } while (0); \
4711 off = iemNativeEmitEndIf(pReNative, off)
4712
4713/** Emits code related to IEM_MC_ENDIF. */
4714DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4715{
4716 /* Check sanity and get the conditional stack entry. */
4717 Assert(off != UINT32_MAX);
4718 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4719 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4720
4721 /*
4722 * Now we have find common group with the core state at the end of the
4723 * if-final. Use the smallest common denominator and just drop anything
4724 * that isn't the same in both states.
4725 */
4726 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4727 * which is why we're doing this at the end of the else-block.
4728 * But we'd need more info about future for that to be worth the effort. */
4729 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4730 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4731 {
4732 /* shadow guest stuff first. */
4733 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4734 if (fGstRegs)
4735 {
4736 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4737 do
4738 {
4739 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4740 fGstRegs &= ~RT_BIT_64(idxGstReg);
4741
4742 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4743 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4744 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4745 {
4746 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
4747 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4748 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4749 }
4750 } while (fGstRegs);
4751 }
4752 else
4753 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4754
4755 /* Check variables next. For now we must require them to be identical
4756 or stuff we can recreate. */
4757 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4758 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4759 if (fVars)
4760 {
4761 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4762 do
4763 {
4764 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4765 fVars &= ~RT_BIT_32(idxVar);
4766
4767 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4768 {
4769 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4770 continue;
4771 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4772 {
4773 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4774 if (idxHstReg != UINT8_MAX)
4775 {
4776 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4777 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4778 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4779 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4780 }
4781 continue;
4782 }
4783 }
4784 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4785 continue;
4786
4787 /* Irreconcilable, so drop it. */
4788 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4789 if (idxHstReg != UINT8_MAX)
4790 {
4791 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4792 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4793 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4794 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4795 }
4796 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4797 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4798 } while (fVars);
4799 }
4800
4801 /* Finally, check that the host register allocations matches. */
4802 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4803 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4804 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4805 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4806 }
4807
4808 /*
4809 * Define the endif label and maybe the else one if we're still in the 'if' part.
4810 */
4811 if (!pEntry->fInElse)
4812 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4813 else
4814 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4815 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4816
4817 /* Pop the conditional stack.*/
4818 pReNative->cCondDepth -= 1;
4819
4820 return off;
4821}
4822
4823
4824#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4825 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4826 do {
4827
4828/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4829DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4830{
4831 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4832
4833 /* Get the eflags. */
4834 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4835 kIemNativeGstRegUse_ReadOnly);
4836
4837 /* Test and jump. */
4838 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4839
4840 /* Free but don't flush the EFlags register. */
4841 iemNativeRegFreeTmp(pReNative, idxEflReg);
4842
4843 /* Make a copy of the core state now as we start the if-block. */
4844 iemNativeCondStartIfBlock(pReNative, off);
4845
4846 return off;
4847}
4848
4849
4850#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4851 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4852 do {
4853
4854/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4855DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4856{
4857 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4858
4859 /* Get the eflags. */
4860 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4861 kIemNativeGstRegUse_ReadOnly);
4862
4863 /* Test and jump. */
4864 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4865
4866 /* Free but don't flush the EFlags register. */
4867 iemNativeRegFreeTmp(pReNative, idxEflReg);
4868
4869 /* Make a copy of the core state now as we start the if-block. */
4870 iemNativeCondStartIfBlock(pReNative, off);
4871
4872 return off;
4873}
4874
4875
4876#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4877 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4878 do {
4879
4880/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4881DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4882{
4883 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4884
4885 /* Get the eflags. */
4886 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4887 kIemNativeGstRegUse_ReadOnly);
4888
4889 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4890 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4891
4892 /* Test and jump. */
4893 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4894
4895 /* Free but don't flush the EFlags register. */
4896 iemNativeRegFreeTmp(pReNative, idxEflReg);
4897
4898 /* Make a copy of the core state now as we start the if-block. */
4899 iemNativeCondStartIfBlock(pReNative, off);
4900
4901 return off;
4902}
4903
4904
4905#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4906 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4907 do {
4908
4909/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4910DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4911{
4912 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4913
4914 /* Get the eflags. */
4915 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4916 kIemNativeGstRegUse_ReadOnly);
4917
4918 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4919 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4920
4921 /* Test and jump. */
4922 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4923
4924 /* Free but don't flush the EFlags register. */
4925 iemNativeRegFreeTmp(pReNative, idxEflReg);
4926
4927 /* Make a copy of the core state now as we start the if-block. */
4928 iemNativeCondStartIfBlock(pReNative, off);
4929
4930 return off;
4931}
4932
4933
4934#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4935 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4936 do {
4937
4938#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4939 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4940 do {
4941
4942/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4943DECL_INLINE_THROW(uint32_t)
4944iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4945 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4946{
4947 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4948
4949 /* Get the eflags. */
4950 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4951 kIemNativeGstRegUse_ReadOnly);
4952
4953 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4954 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4955
4956 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4957 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4958 Assert(iBitNo1 != iBitNo2);
4959
4960#ifdef RT_ARCH_AMD64
4961 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4962
4963 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4964 if (iBitNo1 > iBitNo2)
4965 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4966 else
4967 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4968 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4969
4970#elif defined(RT_ARCH_ARM64)
4971 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4972 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4973
4974 /* and tmpreg, eflreg, #1<<iBitNo1 */
4975 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4976
4977 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4978 if (iBitNo1 > iBitNo2)
4979 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4980 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4981 else
4982 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4983 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4984
4985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4986
4987#else
4988# error "Port me"
4989#endif
4990
4991 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4992 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4993 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4994
4995 /* Free but don't flush the EFlags and tmp registers. */
4996 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4997 iemNativeRegFreeTmp(pReNative, idxEflReg);
4998
4999 /* Make a copy of the core state now as we start the if-block. */
5000 iemNativeCondStartIfBlock(pReNative, off);
5001
5002 return off;
5003}
5004
5005
5006#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
5007 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
5008 do {
5009
5010#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
5011 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
5012 do {
5013
5014/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
5015 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
5016DECL_INLINE_THROW(uint32_t)
5017iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
5018 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
5019{
5020 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5021
5022 /* We need an if-block label for the non-inverted variant. */
5023 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
5024 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
5025
5026 /* Get the eflags. */
5027 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5028 kIemNativeGstRegUse_ReadOnly);
5029
5030 /* Translate the flag masks to bit numbers. */
5031 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5032 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5033
5034 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
5035 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
5036 Assert(iBitNo1 != iBitNo);
5037
5038 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
5039 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
5040 Assert(iBitNo2 != iBitNo);
5041 Assert(iBitNo2 != iBitNo1);
5042
5043#ifdef RT_ARCH_AMD64
5044 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
5045#elif defined(RT_ARCH_ARM64)
5046 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5047#endif
5048
5049 /* Check for the lone bit first. */
5050 if (!fInverted)
5051 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
5052 else
5053 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
5054
5055 /* Then extract and compare the other two bits. */
5056#ifdef RT_ARCH_AMD64
5057 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5058 if (iBitNo1 > iBitNo2)
5059 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
5060 else
5061 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
5062 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
5063
5064#elif defined(RT_ARCH_ARM64)
5065 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5066
5067 /* and tmpreg, eflreg, #1<<iBitNo1 */
5068 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
5069
5070 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
5071 if (iBitNo1 > iBitNo2)
5072 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5073 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
5074 else
5075 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
5076 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
5077
5078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5079
5080#else
5081# error "Port me"
5082#endif
5083
5084 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
5085 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
5086 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
5087
5088 /* Free but don't flush the EFlags and tmp registers. */
5089 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5090 iemNativeRegFreeTmp(pReNative, idxEflReg);
5091
5092 /* Make a copy of the core state now as we start the if-block. */
5093 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
5094
5095 return off;
5096}
5097
5098
5099#define IEM_MC_IF_CX_IS_NZ() \
5100 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
5101 do {
5102
5103/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5104DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5105{
5106 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5107
5108 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5109 kIemNativeGstRegUse_ReadOnly);
5110 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5111 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5112
5113 iemNativeCondStartIfBlock(pReNative, off);
5114 return off;
5115}
5116
5117
5118#define IEM_MC_IF_ECX_IS_NZ() \
5119 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
5120 do {
5121
5122#define IEM_MC_IF_RCX_IS_NZ() \
5123 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
5124 do {
5125
5126/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
5127DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
5128{
5129 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5130
5131 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5132 kIemNativeGstRegUse_ReadOnly);
5133 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5134 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5135
5136 iemNativeCondStartIfBlock(pReNative, off);
5137 return off;
5138}
5139
5140
5141#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5142 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
5143 do {
5144
5145#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5146 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
5147 do {
5148
5149/** Emits code for IEM_MC_IF_CX_IS_NZ. */
5150DECL_INLINE_THROW(uint32_t)
5151iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
5152{
5153 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5154
5155 /* We have to load both RCX and EFLAGS before we can start branching,
5156 otherwise we'll end up in the else-block with an inconsistent
5157 register allocator state.
5158 Doing EFLAGS first as it's more likely to be loaded, right? */
5159 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5160 kIemNativeGstRegUse_ReadOnly);
5161 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5162 kIemNativeGstRegUse_ReadOnly);
5163
5164 /** @todo we could reduce this to a single branch instruction by spending a
5165 * temporary register and some setnz stuff. Not sure if loops are
5166 * worth it. */
5167 /* Check CX. */
5168 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5169
5170 /* Check the EFlags bit. */
5171 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5172 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5173 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5174 !fCheckIfSet /*fJmpIfSet*/);
5175
5176 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5177 iemNativeRegFreeTmp(pReNative, idxEflReg);
5178
5179 iemNativeCondStartIfBlock(pReNative, off);
5180 return off;
5181}
5182
5183
5184#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5185 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
5186 do {
5187
5188#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5189 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
5190 do {
5191
5192#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5193 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
5194 do {
5195
5196#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5197 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
5198 do {
5199
5200/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
5201 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
5202 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
5203 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
5204DECL_INLINE_THROW(uint32_t)
5205iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5206 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
5207{
5208 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5209
5210 /* We have to load both RCX and EFLAGS before we can start branching,
5211 otherwise we'll end up in the else-block with an inconsistent
5212 register allocator state.
5213 Doing EFLAGS first as it's more likely to be loaded, right? */
5214 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5215 kIemNativeGstRegUse_ReadOnly);
5216 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5217 kIemNativeGstRegUse_ReadOnly);
5218
5219 /** @todo we could reduce this to a single branch instruction by spending a
5220 * temporary register and some setnz stuff. Not sure if loops are
5221 * worth it. */
5222 /* Check RCX/ECX. */
5223 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5224
5225 /* Check the EFlags bit. */
5226 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5227 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5228 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5229 !fCheckIfSet /*fJmpIfSet*/);
5230
5231 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5232 iemNativeRegFreeTmp(pReNative, idxEflReg);
5233
5234 iemNativeCondStartIfBlock(pReNative, off);
5235 return off;
5236}
5237
5238
5239
5240/*********************************************************************************************************************************
5241* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
5242*********************************************************************************************************************************/
5243/** Number of hidden arguments for CIMPL calls.
5244 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
5245#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5246# define IEM_CIMPL_HIDDEN_ARGS 3
5247#else
5248# define IEM_CIMPL_HIDDEN_ARGS 2
5249#endif
5250
5251#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
5252 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
5253
5254#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
5255 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
5256
5257#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
5258 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
5259
5260#define IEM_MC_LOCAL(a_Type, a_Name) \
5261 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
5262
5263#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
5264 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
5265
5266
5267/**
5268 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
5269 */
5270DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
5271{
5272 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
5273 return IEM_CIMPL_HIDDEN_ARGS;
5274 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
5275 return 1;
5276 return 0;
5277}
5278
5279
5280/**
5281 * Internal work that allocates a variable with kind set to
5282 * kIemNativeVarKind_Invalid and no current stack allocation.
5283 *
5284 * The kind will either be set by the caller or later when the variable is first
5285 * assigned a value.
5286 */
5287static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5288{
5289 Assert(cbType > 0 && cbType <= 64);
5290 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
5291 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
5292 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
5293 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5294 pReNative->Core.aVars[idxVar].cbVar = cbType;
5295 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5296 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5297 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
5298 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
5299 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
5300 pReNative->Core.aVars[idxVar].u.uValue = 0;
5301 return idxVar;
5302}
5303
5304
5305/**
5306 * Internal work that allocates an argument variable w/o setting enmKind.
5307 */
5308static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5309{
5310 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
5311 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5312 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
5313
5314 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5315 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
5316 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
5317 return idxVar;
5318}
5319
5320
5321/**
5322 * Changes the variable to a stack variable.
5323 *
5324 * Currently this is s only possible to do the first time the variable is used,
5325 * switching later is can be implemented but not done.
5326 *
5327 * @param pReNative The recompiler state.
5328 * @param idxVar The variable.
5329 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5330 */
5331static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5332{
5333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5334 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5335 {
5336 /* We could in theory transition from immediate to stack as well, but it
5337 would involve the caller doing work storing the value on the stack. So,
5338 till that's required we only allow transition from invalid. */
5339 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5340 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5341 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5342 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
5343
5344 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
5345 {
5346 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5347 {
5348 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
5349 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5350 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5351 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5352 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
5353 return;
5354 }
5355 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
5356 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5357 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5358 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5359 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5360 uint32_t bmStack = ~pReNative->Core.bmStack;
5361 while (bmStack != UINT32_MAX)
5362 {
5363 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5364 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5365 if (!(iSlot & fBitAlignMask))
5366 {
5367 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5368 {
5369 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5370 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5371 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
5372 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
5373 return;
5374 }
5375 }
5376 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5377 }
5378 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5379 }
5380 }
5381}
5382
5383
5384/**
5385 * Sets it to a variable with a constant value.
5386 *
5387 * This does not require stack storage as we know the value and can always
5388 * reload it, unless of course it's referenced.
5389 *
5390 * @param pReNative The recompiler state.
5391 * @param idxVar The variable.
5392 * @param uValue The immediate value.
5393 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5394 */
5395static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5396{
5397 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5398 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5399 {
5400 /* Only simple transitions for now. */
5401 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5402 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5403 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5404 }
5405 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5406
5407 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5408}
5409
5410
5411/**
5412 * Sets the variable to a reference (pointer) to @a idxOtherVar.
5413 *
5414 * This does not require stack storage as we know the value and can always
5415 * reload it. Loading is postponed till needed.
5416 *
5417 * @param pReNative The recompiler state.
5418 * @param idxVar The variable.
5419 * @param idxOtherVar The variable to take the (stack) address of.
5420 *
5421 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5422 */
5423static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5424{
5425 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5426 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5427
5428 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5429 {
5430 /* Only simple transitions for now. */
5431 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5432 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5433 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
5434 }
5435 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5436
5437 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5438
5439 /* Update the other variable, ensure it's a stack variable. */
5440 /** @todo handle variables with const values... that'll go boom now. */
5441 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5442 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5443}
5444
5445
5446/**
5447 * Sets the variable to a reference (pointer) to a guest register reference.
5448 *
5449 * This does not require stack storage as we know the value and can always
5450 * reload it. Loading is postponed till needed.
5451 *
5452 * @param pReNative The recompiler state.
5453 * @param idxVar The variable.
5454 * @param enmRegClass The class guest registers to reference.
5455 * @param idxReg The register within @a enmRegClass to reference.
5456 *
5457 * @throws VERR_IEM_VAR_IPE_2
5458 */
5459static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5460 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
5461{
5462 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5463
5464 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
5465 {
5466 /* Only simple transitions for now. */
5467 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5468 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5469 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
5470 }
5471 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5472
5473 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
5474 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
5475}
5476
5477
5478DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5479{
5480 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5481}
5482
5483
5484DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5485{
5486 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5487 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5488 return idxVar;
5489}
5490
5491
5492DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5493{
5494 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5495 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5496 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5497 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5498
5499 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5500 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5501 return idxArgVar;
5502}
5503
5504
5505DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5506{
5507 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5508 /* Don't set to stack now, leave that to the first use as for instance
5509 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
5510 return idxVar;
5511}
5512
5513
5514DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5515{
5516 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5517 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5518 return idxVar;
5519}
5520
5521
5522/**
5523 * Makes sure variable @a idxVar has a register assigned to it.
5524 *
5525 * @returns The host register number.
5526 * @param pReNative The recompiler state.
5527 * @param idxVar The variable.
5528 * @param poff Pointer to the instruction buffer offset.
5529 * In case a register needs to be freed up.
5530 */
5531DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5532{
5533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5534 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
5535
5536 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5537 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5538 {
5539 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5540 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5541 return idxReg;
5542 }
5543
5544 /*
5545 * If the kind of variable has not yet been set, default to 'stack'.
5546 */
5547 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
5548 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5549 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
5550 iemNativeVarSetKindToStack(pReNative, idxVar);
5551
5552 /*
5553 * We have to allocate a register for the variable, even if its a stack one
5554 * as we don't know if there are modification being made to it before its
5555 * finalized (todo: analyze and insert hints about that?).
5556 *
5557 * If we can, we try get the correct register for argument variables. This
5558 * is assuming that most argument variables are fetched as close as possible
5559 * to the actual call, so that there aren't any interfering hidden calls
5560 * (memory accesses, etc) inbetween.
5561 *
5562 * If we cannot or it's a variable, we make sure no argument registers
5563 * that will be used by this MC block will be allocated here, and we always
5564 * prefer non-volatile registers to avoid needing to spill stuff for internal
5565 * call.
5566 */
5567 /** @todo Detect too early argument value fetches and warn about hidden
5568 * calls causing less optimal code to be generated in the python script. */
5569
5570 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5571 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5572 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5573 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5574 else
5575 {
5576 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5577 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5578 & ~pReNative->Core.bmHstRegsWithGstShadow
5579 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5580 & fNotArgsMask;
5581 if (fRegs)
5582 {
5583 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5584 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5585 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5586 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5587 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5588 }
5589 else
5590 {
5591 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5592 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5593 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5594 }
5595 }
5596 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5597 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5598 return idxReg;
5599}
5600
5601
5602/**
5603 * The value of variable @a idxVar will be written in full to the @a enmGstReg
5604 * guest register.
5605 *
5606 * This function makes sure there is a register for it and sets it to be the
5607 * current shadow copy of @a enmGstReg.
5608 *
5609 * @returns The host register number.
5610 * @param pReNative The recompiler state.
5611 * @param idxVar The variable.
5612 * @param enmGstReg The guest register this variable will be written to
5613 * after this call.
5614 * @param poff Pointer to the instruction buffer offset.
5615 * In case a register needs to be freed up or if the
5616 * variable content needs to be loaded off the stack.
5617 *
5618 * @note We DO NOT expect @a idxVar to be an argument variable,
5619 * because we can only in the commit stage of an instruction when this
5620 * function is used.
5621 */
5622DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegisterForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5623 IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
5624{
5625 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5626 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
5627 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
5628 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
5629 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
5630 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
5631 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5632
5633 /*
5634 * This shouldn't ever be used for arguments, unless it's in a weird else
5635 * branch that doesn't do any calling and even then it's questionable.
5636 *
5637 * However, in case someone writes crazy wrong MC code and does register
5638 * updates before making calls, just use the regular register allocator to
5639 * ensure we get a register suitable for the intended argument number.
5640 */
5641 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarAllocRegister(pReNative, idxVar, poff));
5642
5643 /*
5644 * If there is already a register for the variable, we transfer/set the
5645 * guest shadow copy assignment to it.
5646 */
5647 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5648 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5649 {
5650 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
5651 {
5652 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
5653 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
5654 Log12(("iemNativeVarAllocRegisterForGuestReg: Moved %s for guest %s into %s for full write\n",
5655 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
5656 }
5657 else
5658 {
5659 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
5660 Log12(("iemNativeVarAllocRegisterForGuestReg: Marking %s as copy of guest %s (full write)\n",
5661 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
5662 }
5663 /** @todo figure this one out. We need some way of making sure the register isn't
5664 * modified after this point, just in case we start writing crappy MC code. */
5665 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
5666 return idxReg;
5667 }
5668 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
5669
5670 /*
5671 * Because this is supposed to be the commit stage, we're just tag along with the
5672 * temporary register allocator and upgrade it to a variable register.
5673 */
5674 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
5675 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
5676 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
5677 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
5678 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
5679 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5680
5681 /*
5682 * Now we need to load the register value.
5683 */
5684 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
5685 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
5686 else
5687 {
5688 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5689 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_7));
5690 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
5691 switch (pReNative->Core.aVars[idxVar].cbVar)
5692 {
5693 case sizeof(uint64_t):
5694 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
5695 break;
5696 case sizeof(uint32_t):
5697 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
5698 break;
5699 case sizeof(uint16_t):
5700 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
5701 break;
5702 case sizeof(uint8_t):
5703 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
5704 break;
5705 default:
5706 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5707 }
5708 }
5709
5710 return idxReg;
5711}
5712
5713
5714/**
5715 * Sets the host register for @a idxVarRc to @a idxReg.
5716 *
5717 * The register must not be allocated. Any guest register shadowing will be
5718 * implictly dropped by this call.
5719 *
5720 * The variable must not have any register associated with it (causes
5721 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
5722 * implied.
5723 *
5724 * @returns idxReg
5725 * @param pReNative The recompiler state.
5726 * @param idxVar The variable.
5727 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
5728 * @param off For recording in debug info.
5729 *
5730 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
5731 */
5732DECL_INLINE_THROW(uint8_t) iemNativeVarSetRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
5733{
5734 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5735 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5736 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
5737 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
5738
5739 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
5740 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5741
5742 iemNativeVarSetKindToStack(pReNative, idxVar);
5743 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5744
5745 return idxReg;
5746}
5747
5748
5749/**
5750 * Worker that frees the stack slots for variable @a idxVar if any allocated.
5751 *
5752 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
5753 */
5754DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5755{
5756 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5757 Assert(idxStackSlot == UINT8_MAX || idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
5758 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
5759 {
5760 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
5761 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
5762 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
5763 Assert(cSlots > 0);
5764 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
5765 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
5766 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
5767 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5768 }
5769}
5770
5771
5772/**
5773 * Worker that frees a single variable.
5774 *
5775 * ASSUMES that @a idxVar is valid.
5776 */
5777DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5778{
5779 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5780 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5781
5782 /* Free the host register first if any assigned. */
5783 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5784 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5785 {
5786 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5787 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5788 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5789 }
5790
5791 /* Free argument mapping. */
5792 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5793 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
5794 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
5795
5796 /* Free the stack slots. */
5797 iemNativeVarFreeStackSlots(pReNative, idxVar);
5798
5799 /* Free the actual variable. */
5800 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5801 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5802}
5803
5804
5805/**
5806 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
5807 */
5808DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
5809{
5810 while (bmVars != 0)
5811 {
5812 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
5813 bmVars &= ~RT_BIT_32(idxVar);
5814
5815#if 1 /** @todo optimize by simplifying this later... */
5816 iemNativeVarFreeOneWorker(pReNative, idxVar);
5817#else
5818 /* Only need to free the host register, the rest is done as bulk updates below. */
5819 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5820 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5821 {
5822 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5823 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5824 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5825 }
5826#endif
5827 }
5828#if 0 /** @todo optimize by simplifying this later... */
5829 pReNative->Core.bmVars = 0;
5830 pReNative->Core.bmStack = 0;
5831 pReNative->Core.u64ArgVars = UINT64_MAX;
5832#endif
5833}
5834
5835
5836/**
5837 * This is called by IEM_MC_END() to clean up all variables.
5838 */
5839DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
5840{
5841 uint32_t const bmVars = pReNative->Core.bmVars;
5842 if (bmVars != 0)
5843 iemNativeVarFreeAllSlow(pReNative, bmVars);
5844 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5845 Assert(pReNative->Core.bmStack == 0);
5846}
5847
5848
5849#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
5850
5851/**
5852 * This is called by IEM_MC_FREE_LOCAL.
5853 */
5854DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5855{
5856 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5857 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
5858 iemNativeVarFreeOneWorker(pReNative, idxVar);
5859}
5860
5861
5862#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
5863
5864/**
5865 * This is called by IEM_MC_FREE_ARG.
5866 */
5867DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5868{
5869 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5870 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
5871 iemNativeVarFreeOneWorker(pReNative, idxVar);
5872}
5873
5874
5875
5876/*********************************************************************************************************************************
5877* Emitters for IEM_MC_CALL_CIMPL_XXX *
5878*********************************************************************************************************************************/
5879
5880/**
5881 * Emits code to load a reference to the given guest register into @a idxGprDst.
5882 */
5883DECL_INLINE_THROW(uint32_t)
5884iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5885 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5886{
5887 /*
5888 * Get the offset relative to the CPUMCTX structure.
5889 */
5890 uint32_t offCpumCtx;
5891 switch (enmClass)
5892 {
5893 case kIemNativeGstRegRef_Gpr:
5894 Assert(idxRegInClass < 16);
5895 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5896 break;
5897
5898 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5899 Assert(idxRegInClass < 4);
5900 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5901 break;
5902
5903 case kIemNativeGstRegRef_EFlags:
5904 Assert(idxRegInClass == 0);
5905 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5906 break;
5907
5908 case kIemNativeGstRegRef_MxCsr:
5909 Assert(idxRegInClass == 0);
5910 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5911 break;
5912
5913 case kIemNativeGstRegRef_FpuReg:
5914 Assert(idxRegInClass < 8);
5915 AssertFailed(); /** @todo what kind of indexing? */
5916 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5917 break;
5918
5919 case kIemNativeGstRegRef_MReg:
5920 Assert(idxRegInClass < 8);
5921 AssertFailed(); /** @todo what kind of indexing? */
5922 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5923 break;
5924
5925 case kIemNativeGstRegRef_XReg:
5926 Assert(idxRegInClass < 16);
5927 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5928 break;
5929
5930 default:
5931 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5932 }
5933
5934 /*
5935 * Load the value into the destination register.
5936 */
5937#ifdef RT_ARCH_AMD64
5938 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5939
5940#elif defined(RT_ARCH_ARM64)
5941 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5942 Assert(offCpumCtx < 4096);
5943 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5944
5945#else
5946# error "Port me!"
5947#endif
5948
5949 return off;
5950}
5951
5952
5953/**
5954 * Common code for CIMPL and AIMPL calls.
5955 *
5956 * These are calls that uses argument variables and such. They should not be
5957 * confused with internal calls required to implement an MC operation,
5958 * like a TLB load and similar.
5959 *
5960 * Upon return all that is left to do is to load any hidden arguments and
5961 * perform the call. All argument variables are freed.
5962 *
5963 * @returns New code buffer offset; throws VBox status code on error.
5964 * @param pReNative The native recompile state.
5965 * @param off The code buffer offset.
5966 * @param cArgs The total nubmer of arguments (includes hidden
5967 * count).
5968 * @param cHiddenArgs The number of hidden arguments. The hidden
5969 * arguments must not have any variable declared for
5970 * them, whereas all the regular arguments must
5971 * (tstIEMCheckMc ensures this).
5972 */
5973DECL_HIDDEN_THROW(uint32_t)
5974iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5975{
5976#ifdef VBOX_STRICT
5977 /*
5978 * Assert sanity.
5979 */
5980 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5981 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5982 for (unsigned i = 0; i < cHiddenArgs; i++)
5983 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5984 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5985 {
5986 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5987 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5988 }
5989#endif
5990
5991 /*
5992 * Before we do anything else, go over variables that are referenced and
5993 * make sure they are not in a register.
5994 */
5995 uint32_t bmVars = pReNative->Core.bmVars;
5996 if (bmVars)
5997 do
5998 {
5999 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
6000 bmVars &= ~RT_BIT_32(idxVar);
6001
6002 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
6003 {
6004 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
6005 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
6006 {
6007 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
6008 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
6009 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
6010 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
6011 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
6012 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
6013
6014 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6015 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
6016 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
6017 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
6018 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
6019 }
6020 }
6021 } while (bmVars != 0);
6022
6023 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
6024
6025 /*
6026 * First, go over the host registers that will be used for arguments and make
6027 * sure they either hold the desired argument or are free.
6028 */
6029 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
6030 for (uint32_t i = 0; i < cRegArgs; i++)
6031 {
6032 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6033 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6034 {
6035 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
6036 {
6037 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
6038 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6039 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
6040 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
6041 if (uArgNo == i)
6042 { /* prefect */ }
6043 else
6044 {
6045 /* The variable allocator logic should make sure this is impossible. */
6046 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
6047
6048 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6049 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
6050 else
6051 {
6052 /* just free it, can be reloaded if used again */
6053 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6054 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
6055 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
6056 }
6057 }
6058 }
6059 else
6060 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
6061 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
6062 }
6063 }
6064
6065 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
6066
6067#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
6068 /*
6069 * If there are any stack arguments, make sure they are in their place as well.
6070 *
6071 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
6072 * the caller) be loading it later and it must be free (see first loop).
6073 */
6074 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
6075 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
6076 {
6077 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6078 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
6079 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6080 {
6081 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
6082 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
6083 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
6084 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6085 }
6086 else
6087 {
6088 /* Use ARG0 as temp for stuff we need registers for. */
6089 switch (pReNative->Core.aVars[idxVar].enmKind)
6090 {
6091 case kIemNativeVarKind_Stack:
6092 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
6093 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6094 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
6095 iemNativeVarCalcBpDisp(pReNative, idxVar));
6096 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6097 continue;
6098
6099 case kIemNativeVarKind_Immediate:
6100 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
6101 continue;
6102
6103 case kIemNativeVarKind_VarRef:
6104 {
6105 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
6106 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
6107 AssertStmt( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
6108 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX,
6109 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
6110 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
6111 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
6112 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6113 continue;
6114 }
6115
6116 case kIemNativeVarKind_GstRegRef:
6117 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
6118 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
6119 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
6120 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
6121 continue;
6122
6123 case kIemNativeVarKind_Invalid:
6124 case kIemNativeVarKind_End:
6125 break;
6126 }
6127 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6128 }
6129 }
6130#else
6131 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
6132#endif
6133
6134 /*
6135 * Make sure the argument variables are loaded into their respective registers.
6136 *
6137 * We can optimize this by ASSUMING that any register allocations are for
6138 * registeres that have already been loaded and are ready. The previous step
6139 * saw to that.
6140 */
6141 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
6142 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6143 {
6144 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
6145 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
6146 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
6147 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
6148 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
6149 else
6150 {
6151 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6152 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6153 {
6154 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
6155 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
6156 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
6157 | RT_BIT_32(idxArgReg);
6158 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
6159 }
6160 else
6161 {
6162 /* Use ARG0 as temp for stuff we need registers for. */
6163 switch (pReNative->Core.aVars[idxVar].enmKind)
6164 {
6165 case kIemNativeVarKind_Stack:
6166 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
6167 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6168 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeVarCalcBpDisp(pReNative, idxVar));
6169 continue;
6170
6171 case kIemNativeVarKind_Immediate:
6172 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
6173 continue;
6174
6175 case kIemNativeVarKind_VarRef:
6176 {
6177 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
6178 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
6179 AssertStmt( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
6180 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX,
6181 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
6182 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
6183 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
6184 continue;
6185 }
6186
6187 case kIemNativeVarKind_GstRegRef:
6188 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
6189 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
6190 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
6191 continue;
6192
6193 case kIemNativeVarKind_Invalid:
6194 case kIemNativeVarKind_End:
6195 break;
6196 }
6197 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
6198 }
6199 }
6200 }
6201#ifdef VBOX_STRICT
6202 else
6203 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6204 {
6205 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
6206 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
6207 }
6208#endif
6209
6210 /*
6211 * Free all argument variables (simplified).
6212 * Their lifetime always expires with the call they are for.
6213 */
6214 /** @todo Make the python script check that arguments aren't used after
6215 * IEM_MC_CALL_XXXX. */
6216 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
6217 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
6218 * an argument value. There is also some FPU stuff. */
6219 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
6220 {
6221 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6222 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6223
6224 /* no need to free registers: */
6225 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
6226 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
6227 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
6228 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
6229 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
6230 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
6231
6232 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
6233 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6234 iemNativeVarFreeStackSlots(pReNative, idxVar);
6235 }
6236 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6237
6238 /*
6239 * Flush volatile registers as we make the call.
6240 */
6241 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
6242
6243 return off;
6244}
6245
6246
6247/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
6248DECL_HIDDEN_THROW(uint32_t)
6249iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
6250 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
6251
6252{
6253 /*
6254 * Do all the call setup and cleanup.
6255 */
6256 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
6257
6258 /*
6259 * Load the two or three hidden arguments.
6260 */
6261#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6262 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6263 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6264 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
6265#else
6266 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6267 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
6268#endif
6269
6270 /*
6271 * Make the call and check the return code.
6272 *
6273 * Shadow PC copies are always flushed here, other stuff depends on flags.
6274 * Segment and general purpose registers are explictily flushed via the
6275 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
6276 * macros.
6277 */
6278 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
6279#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6280 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6281#endif
6282 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
6283 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
6284 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6285 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6286
6287 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6288}
6289
6290
6291#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6292 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
6293
6294/** Emits code for IEM_MC_CALL_CIMPL_1. */
6295DECL_INLINE_THROW(uint32_t)
6296iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6297 uintptr_t pfnCImpl, uint8_t idxArg0)
6298{
6299 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6300 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
6301}
6302
6303
6304#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6305 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
6306
6307/** Emits code for IEM_MC_CALL_CIMPL_2. */
6308DECL_INLINE_THROW(uint32_t)
6309iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6310 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
6311{
6312 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6313 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6314 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
6315}
6316
6317
6318#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6319 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6320 (uintptr_t)a_pfnCImpl, a0, a1, a2)
6321
6322/** Emits code for IEM_MC_CALL_CIMPL_3. */
6323DECL_INLINE_THROW(uint32_t)
6324iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6325 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
6326{
6327 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6328 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6329 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6330 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
6331}
6332
6333
6334#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
6335 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6336 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
6337
6338/** Emits code for IEM_MC_CALL_CIMPL_4. */
6339DECL_INLINE_THROW(uint32_t)
6340iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6341 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
6342{
6343 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6344 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6345 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6346 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
6347 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
6348}
6349
6350
6351#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
6352 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6353 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
6354
6355/** Emits code for IEM_MC_CALL_CIMPL_4. */
6356DECL_INLINE_THROW(uint32_t)
6357iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6358 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
6359{
6360 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6361 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6362 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6363 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
6364 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
6365 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
6366}
6367
6368
6369/** Recompiler debugging: Flush guest register shadow copies. */
6370#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
6371
6372
6373
6374/*********************************************************************************************************************************
6375* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
6376*********************************************************************************************************************************/
6377
6378/**
6379 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
6380 */
6381DECL_INLINE_THROW(uint32_t)
6382iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6383 uintptr_t pfnAImpl, uint8_t cArgs)
6384{
6385 if (idxVarRc != UINT8_MAX)
6386 {
6387 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
6388 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
6389 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
6390 }
6391
6392 /*
6393 * Do all the call setup and cleanup.
6394 */
6395 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
6396
6397 /*
6398 * Make the call and update the return code variable if we've got one.
6399 */
6400 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
6401 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
6402 {
6403pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
6404 iemNativeVarSetRegister(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
6405 }
6406
6407 return off;
6408}
6409
6410
6411
6412#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
6413 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
6414
6415#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
6416 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
6417
6418/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
6419DECL_INLINE_THROW(uint32_t)
6420iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
6421{
6422 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
6423}
6424
6425
6426#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
6427 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
6428
6429#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
6430 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
6431
6432/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
6433DECL_INLINE_THROW(uint32_t)
6434iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
6435{
6436 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6437 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
6438}
6439
6440
6441#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
6442 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
6443
6444#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
6445 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
6446
6447/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
6448DECL_INLINE_THROW(uint32_t)
6449iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6450 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
6451{
6452 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6453 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6454 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
6455}
6456
6457
6458#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
6459 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
6460
6461#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
6462 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
6463
6464/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
6465DECL_INLINE_THROW(uint32_t)
6466iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6467 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
6468{
6469 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6470 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6471 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
6472 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
6473}
6474
6475
6476#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
6477 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
6478
6479#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
6480 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
6481
6482/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
6483DECL_INLINE_THROW(uint32_t)
6484iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6485 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
6486{
6487 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6488 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6489 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
6490 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
6491 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
6492}
6493
6494
6495
6496/*********************************************************************************************************************************
6497* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
6498*********************************************************************************************************************************/
6499
6500#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
6501 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx)
6502
6503/** Emits code for IEM_MC_FETCH_GREG_U8. */
6504DECL_INLINE_THROW(uint32_t)
6505iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx)
6506{
6507 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6508 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint8_t));
6509 Assert(iGRegEx < 20);
6510
6511 /* Same discussion as in iemNativeEmitFetchGregU16 */
6512 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
6513 kIemNativeGstRegUse_ReadOnly);
6514
6515 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6516 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6517
6518 if (iGRegEx < 16)
6519 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
6520 else
6521 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
6522
6523 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6524 return off;
6525}
6526
6527
6528#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
6529 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
6530
6531/** Emits code for IEM_MC_FETCH_GREG_U16. */
6532DECL_INLINE_THROW(uint32_t)
6533iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6534{
6535 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6536 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
6537 Assert(iGReg < 16);
6538
6539 /*
6540 * We can either just load the low 16-bit of the GPR into a host register
6541 * for the variable, or we can do so via a shadow copy host register. The
6542 * latter will avoid having to reload it if it's being stored later, but
6543 * will waste a host register if it isn't touched again. Since we don't
6544 * know what going to happen, we choose the latter for now.
6545 */
6546 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6547 kIemNativeGstRegUse_ReadOnly);
6548
6549 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6550 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6551 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
6552
6553 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6554 return off;
6555}
6556
6557
6558#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
6559 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg)
6560
6561/** Emits code for IEM_MC_FETCH_GREG_U32. */
6562DECL_INLINE_THROW(uint32_t)
6563iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6564{
6565 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6566 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint32_t));
6567 Assert(iGReg < 16);
6568
6569 /*
6570 * We can either just load the low 16-bit of the GPR into a host register
6571 * for the variable, or we can do so via a shadow copy host register. The
6572 * latter will avoid having to reload it if it's being stored later, but
6573 * will waste a host register if it isn't touched again. Since we don't
6574 * know what going to happen, we choose the latter for now.
6575 */
6576 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6577 kIemNativeGstRegUse_ReadOnly);
6578
6579 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6580 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6581 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
6582
6583 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6584 return off;
6585}
6586
6587
6588
6589/*********************************************************************************************************************************
6590* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
6591*********************************************************************************************************************************/
6592
6593#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
6594 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
6595
6596/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
6597DECL_INLINE_THROW(uint32_t)
6598iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
6599{
6600 Assert(iGRegEx < 20);
6601 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
6602 kIemNativeGstRegUse_ForUpdate);
6603#ifdef RT_ARCH_AMD64
6604 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6605
6606 /* To the lowest byte of the register: mov r8, imm8 */
6607 if (iGRegEx < 16)
6608 {
6609 if (idxGstTmpReg >= 8)
6610 pbCodeBuf[off++] = X86_OP_REX_B;
6611 else if (idxGstTmpReg >= 4)
6612 pbCodeBuf[off++] = X86_OP_REX;
6613 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6614 pbCodeBuf[off++] = u8Value;
6615 }
6616 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
6617 else if (idxGstTmpReg < 4)
6618 {
6619 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
6620 pbCodeBuf[off++] = u8Value;
6621 }
6622 else
6623 {
6624 /* ror reg64, 8 */
6625 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6626 pbCodeBuf[off++] = 0xc1;
6627 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6628 pbCodeBuf[off++] = 8;
6629
6630 /* mov reg8, imm8 */
6631 if (idxGstTmpReg >= 8)
6632 pbCodeBuf[off++] = X86_OP_REX_B;
6633 else if (idxGstTmpReg >= 4)
6634 pbCodeBuf[off++] = X86_OP_REX;
6635 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6636 pbCodeBuf[off++] = u8Value;
6637
6638 /* rol reg64, 8 */
6639 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6640 pbCodeBuf[off++] = 0xc1;
6641 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
6642 pbCodeBuf[off++] = 8;
6643 }
6644
6645#elif defined(RT_ARCH_ARM64)
6646 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
6647 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6648 if (iGRegEx < 16)
6649 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
6650 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
6651 else
6652 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
6653 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
6654 iemNativeRegFreeTmp(pReNative, idxImmReg);
6655
6656#else
6657# error "Port me!"
6658#endif
6659
6660 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6661
6662 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
6663
6664 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6665 return off;
6666}
6667
6668
6669#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
6670 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
6671
6672/** Emits code for IEM_MC_STORE_GREG_U16. */
6673DECL_INLINE_THROW(uint32_t)
6674iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
6675{
6676 Assert(iGReg < 16);
6677 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6678 kIemNativeGstRegUse_ForUpdate);
6679#ifdef RT_ARCH_AMD64
6680 /* mov reg16, imm16 */
6681 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6682 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6683 if (idxGstTmpReg >= 8)
6684 pbCodeBuf[off++] = X86_OP_REX_B;
6685 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
6686 pbCodeBuf[off++] = RT_BYTE1(uValue);
6687 pbCodeBuf[off++] = RT_BYTE2(uValue);
6688
6689#elif defined(RT_ARCH_ARM64)
6690 /* movk xdst, #uValue, lsl #0 */
6691 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6692 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
6693
6694#else
6695# error "Port me!"
6696#endif
6697
6698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6699
6700 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6701 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6702 return off;
6703}
6704
6705
6706#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
6707 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
6708
6709/** Emits code for IEM_MC_STORE_GREG_U16. */
6710DECL_INLINE_THROW(uint32_t)
6711iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6712{
6713 Assert(iGReg < 16);
6714 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6715
6716 /*
6717 * If it's a constant value (unlikely) we treat this as a
6718 * IEM_MC_STORE_GREG_U16_CONST statement.
6719 */
6720 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6721 { /* likely */ }
6722 else
6723 {
6724 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6725 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6726 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6727 }
6728
6729 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6730 kIemNativeGstRegUse_ForUpdate);
6731
6732#ifdef RT_ARCH_AMD64
6733 /* mov reg16, reg16 or [mem16] */
6734 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6735 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6736 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6737 {
6738 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
6739 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
6740 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
6741 pbCodeBuf[off++] = 0x8b;
6742 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
6743 }
6744 else
6745 {
6746 AssertStmt(pReNative->Core.aVars[idxValueVar].idxStackSlot != UINT8_MAX,
6747 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
6748 if (idxGstTmpReg >= 8)
6749 pbCodeBuf[off++] = X86_OP_REX_R;
6750 pbCodeBuf[off++] = 0x8b;
6751 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeVarCalcBpDisp(pReNative, idxValueVar), pReNative);
6752 }
6753
6754#elif defined(RT_ARCH_ARM64)
6755 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6756 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxValueVar, &off);
6757 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6758 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
6759
6760#else
6761# error "Port me!"
6762#endif
6763
6764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6765
6766 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6767 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6768 return off;
6769}
6770
6771
6772#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
6773 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
6774
6775/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
6776DECL_INLINE_THROW(uint32_t)
6777iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
6778{
6779 Assert(iGReg < 16);
6780 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6781 kIemNativeGstRegUse_ForFullWrite);
6782 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
6783 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6784 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6785 return off;
6786}
6787
6788
6789#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
6790 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
6791
6792/** Emits code for IEM_MC_STORE_GREG_U32. */
6793DECL_INLINE_THROW(uint32_t)
6794iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6795{
6796 Assert(iGReg < 16);
6797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6798
6799 /*
6800 * If it's a constant value (unlikely) we treat this as a
6801 * IEM_MC_STORE_GREG_U32_CONST statement.
6802 */
6803 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6804 { /* likely */ }
6805 else
6806 {
6807 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6808 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6809 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6810 }
6811
6812 /*
6813 * For the rest we allocate a guest register for the variable and writes
6814 * it to the CPUMCTX structure.
6815 */
6816 uint8_t const idxVarReg = iemNativeVarAllocRegisterForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
6817 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6818#ifdef VBOX_STRICT
6819 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
6820#endif
6821 return off;
6822}
6823
6824
6825
6826#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
6827 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
6828
6829/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
6830DECL_INLINE_THROW(uint32_t)
6831iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
6832{
6833 Assert(iGReg < 16);
6834 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6835 kIemNativeGstRegUse_ForUpdate);
6836 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
6837 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6838 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6839 return off;
6840}
6841
6842
6843/*********************************************************************************************************************************
6844* General purpose register manipulation (add, sub). *
6845*********************************************************************************************************************************/
6846
6847#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
6848 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
6849
6850/** Emits code for IEM_MC_SUB_GREG_U16. */
6851DECL_INLINE_THROW(uint32_t)
6852iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
6853{
6854 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6855 kIemNativeGstRegUse_ForUpdate);
6856
6857#ifdef RT_ARCH_AMD64
6858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6860 if (idxGstTmpReg >= 8)
6861 pbCodeBuf[off++] = X86_OP_REX_B;
6862 if (uSubtrahend)
6863 {
6864 pbCodeBuf[off++] = 0xff; /* dec */
6865 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6866 }
6867 else
6868 {
6869 pbCodeBuf[off++] = 0x81;
6870 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6871 pbCodeBuf[off++] = uSubtrahend;
6872 pbCodeBuf[off++] = 0;
6873 }
6874
6875#else
6876 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6877 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6878
6879 /* sub tmp, gstgrp, uSubtrahend */
6880 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
6881
6882 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
6883 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
6884
6885 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6886#endif
6887
6888 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6889
6890 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6891
6892 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6893 return off;
6894}
6895
6896
6897#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
6898 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
6899
6900#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
6901 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
6902
6903/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
6904DECL_INLINE_THROW(uint32_t)
6905iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
6906{
6907 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6908 kIemNativeGstRegUse_ForUpdate);
6909
6910#ifdef RT_ARCH_AMD64
6911 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6912 if (f64Bit)
6913 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
6914 else if (idxGstTmpReg >= 8)
6915 pbCodeBuf[off++] = X86_OP_REX_B;
6916 if (uSubtrahend == 1)
6917 {
6918 /* dec */
6919 pbCodeBuf[off++] = 0xff;
6920 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6921 }
6922 else if (uSubtrahend < 128)
6923 {
6924 pbCodeBuf[off++] = 0x83; /* sub */
6925 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6926 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6927 }
6928 else
6929 {
6930 pbCodeBuf[off++] = 0x81; /* sub */
6931 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6932 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6933 pbCodeBuf[off++] = 0;
6934 pbCodeBuf[off++] = 0;
6935 pbCodeBuf[off++] = 0;
6936 }
6937
6938#else
6939 /* sub tmp, gstgrp, uSubtrahend */
6940 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6941 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
6942
6943#endif
6944
6945 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6946
6947 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6948
6949 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6950 return off;
6951}
6952
6953
6954
6955/*********************************************************************************************************************************
6956* Register references. *
6957*********************************************************************************************************************************/
6958
6959#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6960 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6961
6962#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
6963 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6964
6965/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6966DECL_INLINE_THROW(uint32_t)
6967iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6968{
6969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6970 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
6971 Assert(iGRegEx < 20);
6972
6973 if (iGRegEx < 16)
6974 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6975 else
6976 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6977
6978 /* If we've delayed writing back the register value, flush it now. */
6979 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6980
6981 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6982 if (!fConst)
6983 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6984
6985 return off;
6986}
6987
6988#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6989 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6990
6991#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6992 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6993
6994#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6995 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6996
6997#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6998 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6999
7000#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
7001 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
7002
7003#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
7004 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
7005
7006#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
7007 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
7008
7009#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
7010 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
7011
7012#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
7013 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
7014
7015#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
7016 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
7017
7018/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
7019DECL_INLINE_THROW(uint32_t)
7020iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
7021{
7022 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
7023 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
7024 Assert(iGReg < 16);
7025
7026 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
7027
7028 /* If we've delayed writing back the register value, flush it now. */
7029 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
7030
7031 /* If it's not a const reference we need to flush the shadow copy of the register now. */
7032 if (!fConst)
7033 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
7034
7035 return off;
7036}
7037
7038
7039#define IEM_MC_REF_EFLAGS(a_pEFlags) \
7040 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
7041
7042/** Handles IEM_MC_REF_EFLAGS. */
7043DECL_INLINE_THROW(uint32_t)
7044iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
7045{
7046 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
7047 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
7048
7049 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
7050
7051 /* If we've delayed writing back the register value, flush it now. */
7052 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
7053
7054 /* If there is a shadow copy of guest EFLAGS, flush it now. */
7055 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
7056
7057 return off;
7058}
7059
7060
7061/*********************************************************************************************************************************
7062* Effective Address Calculation *
7063*********************************************************************************************************************************/
7064#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
7065 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
7066
7067/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
7068 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
7069DECL_INLINE_THROW(uint32_t)
7070iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7071 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
7072{
7073 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
7074
7075 /*
7076 * Handle the disp16 form with no registers first.
7077 *
7078 * Convert to an immediate value, as that'll delay the register allocation
7079 * and assignment till the memory access / call / whatever and we can use
7080 * a more appropriate register (or none at all).
7081 */
7082 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
7083 {
7084 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
7085 return off;
7086 }
7087
7088 /* Determin the displacment. */
7089 uint16_t u16EffAddr;
7090 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
7091 {
7092 case 0: u16EffAddr = 0; break;
7093 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
7094 case 2: u16EffAddr = u16Disp; break;
7095 default: AssertFailedStmt(u16EffAddr = 0);
7096 }
7097
7098 /* Determine the registers involved. */
7099 uint8_t idxGstRegBase;
7100 uint8_t idxGstRegIndex;
7101 switch (bRm & X86_MODRM_RM_MASK)
7102 {
7103 case 0:
7104 idxGstRegBase = X86_GREG_xBX;
7105 idxGstRegIndex = X86_GREG_xSI;
7106 break;
7107 case 1:
7108 idxGstRegBase = X86_GREG_xBX;
7109 idxGstRegIndex = X86_GREG_xDI;
7110 break;
7111 case 2:
7112 idxGstRegBase = X86_GREG_xBP;
7113 idxGstRegIndex = X86_GREG_xSI;
7114 break;
7115 case 3:
7116 idxGstRegBase = X86_GREG_xBP;
7117 idxGstRegIndex = X86_GREG_xDI;
7118 break;
7119 case 4:
7120 idxGstRegBase = X86_GREG_xSI;
7121 idxGstRegIndex = UINT8_MAX;
7122 break;
7123 case 5:
7124 idxGstRegBase = X86_GREG_xDI;
7125 idxGstRegIndex = UINT8_MAX;
7126 break;
7127 case 6:
7128 idxGstRegBase = X86_GREG_xBP;
7129 idxGstRegIndex = UINT8_MAX;
7130 break;
7131#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
7132 default:
7133#endif
7134 case 7:
7135 idxGstRegBase = X86_GREG_xBX;
7136 idxGstRegIndex = UINT8_MAX;
7137 break;
7138 }
7139
7140 /*
7141 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
7142 */
7143 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
7144 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
7145 kIemNativeGstRegUse_ReadOnly);
7146 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
7147 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
7148 kIemNativeGstRegUse_ReadOnly)
7149 : UINT8_MAX;
7150#ifdef RT_ARCH_AMD64
7151 if (idxRegIndex == UINT8_MAX)
7152 {
7153 if (u16EffAddr == 0)
7154 {
7155 /* movxz ret, base */
7156 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
7157 }
7158 else
7159 {
7160 /* lea ret32, [base64 + disp32] */
7161 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
7162 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7163 if (idxRegRet >= 8 || idxRegBase >= 8)
7164 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
7165 pbCodeBuf[off++] = 0x8d;
7166 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
7168 else
7169 {
7170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
7171 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7172 }
7173 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
7174 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
7175 pbCodeBuf[off++] = 0;
7176 pbCodeBuf[off++] = 0;
7177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7178
7179 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
7180 }
7181 }
7182 else
7183 {
7184 /* lea ret32, [index64 + base64 (+ disp32)] */
7185 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7186 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7187 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7188 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7189 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7190 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
7191 pbCodeBuf[off++] = 0x8d;
7192 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
7193 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7194 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
7195 if (bMod == X86_MOD_MEM4)
7196 {
7197 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
7198 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
7199 pbCodeBuf[off++] = 0;
7200 pbCodeBuf[off++] = 0;
7201 }
7202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7203 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
7204 }
7205
7206#elif defined(RT_ARCH_ARM64)
7207 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7208 if (u16EffAddr == 0)
7209 {
7210 if (idxRegIndex == UINT8_MAX)
7211 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
7212 else
7213 {
7214 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
7215 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
7216 }
7217 }
7218 else
7219 {
7220 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
7221 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
7222 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
7223 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
7224 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
7225 else
7226 {
7227 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
7228 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
7229 }
7230 if (idxRegIndex != UINT8_MAX)
7231 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
7232 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
7233 }
7234
7235#else
7236# error "port me"
7237#endif
7238
7239 if (idxRegIndex != UINT8_MAX)
7240 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7241 iemNativeRegFreeTmp(pReNative, idxRegBase);
7242 return off;
7243}
7244
7245
7246#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
7247 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
7248
7249/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
7250 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
7251DECL_INLINE_THROW(uint32_t)
7252iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7253 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
7254{
7255 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
7256
7257 /*
7258 * Handle the disp32 form with no registers first.
7259 *
7260 * Convert to an immediate value, as that'll delay the register allocation
7261 * and assignment till the memory access / call / whatever and we can use
7262 * a more appropriate register (or none at all).
7263 */
7264 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
7265 {
7266 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
7267 return off;
7268 }
7269
7270 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
7271 uint32_t u32EffAddr = 0;
7272 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
7273 {
7274 case 0: break;
7275 case 1: u32EffAddr = (int8_t)u32Disp; break;
7276 case 2: u32EffAddr = u32Disp; break;
7277 default: AssertFailed();
7278 }
7279
7280 /* Get the register (or SIB) value. */
7281 uint8_t idxGstRegBase = UINT8_MAX;
7282 uint8_t idxGstRegIndex = UINT8_MAX;
7283 uint8_t cShiftIndex = 0;
7284 switch (bRm & X86_MODRM_RM_MASK)
7285 {
7286 case 0: idxGstRegBase = X86_GREG_xAX; break;
7287 case 1: idxGstRegBase = X86_GREG_xCX; break;
7288 case 2: idxGstRegBase = X86_GREG_xDX; break;
7289 case 3: idxGstRegBase = X86_GREG_xBX; break;
7290 case 4: /* SIB */
7291 {
7292 /* index /w scaling . */
7293 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
7294 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
7295 {
7296 case 0: idxGstRegIndex = X86_GREG_xAX; break;
7297 case 1: idxGstRegIndex = X86_GREG_xCX; break;
7298 case 2: idxGstRegIndex = X86_GREG_xDX; break;
7299 case 3: idxGstRegIndex = X86_GREG_xBX; break;
7300 case 4: cShiftIndex = 0; /*no index*/ break;
7301 case 5: idxGstRegIndex = X86_GREG_xBP; break;
7302 case 6: idxGstRegIndex = X86_GREG_xSI; break;
7303 case 7: idxGstRegIndex = X86_GREG_xDI; break;
7304 }
7305
7306 /* base */
7307 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
7308 {
7309 case 0: idxGstRegBase = X86_GREG_xAX; break;
7310 case 1: idxGstRegBase = X86_GREG_xCX; break;
7311 case 2: idxGstRegBase = X86_GREG_xDX; break;
7312 case 3: idxGstRegBase = X86_GREG_xBX; break;
7313 case 4:
7314 idxGstRegBase = X86_GREG_xSP;
7315 u32EffAddr += uSibAndRspOffset >> 8;
7316 break;
7317 case 5:
7318 if ((bRm & X86_MODRM_MOD_MASK) != 0)
7319 idxGstRegBase = X86_GREG_xBP;
7320 else
7321 {
7322 Assert(u32EffAddr == 0);
7323 u32EffAddr = u32Disp;
7324 }
7325 break;
7326 case 6: idxGstRegBase = X86_GREG_xSI; break;
7327 case 7: idxGstRegBase = X86_GREG_xDI; break;
7328 }
7329 break;
7330 }
7331 case 5: idxGstRegBase = X86_GREG_xBP; break;
7332 case 6: idxGstRegBase = X86_GREG_xSI; break;
7333 case 7: idxGstRegBase = X86_GREG_xDI; break;
7334 }
7335
7336 /*
7337 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
7338 * the start of the function.
7339 */
7340 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
7341 {
7342 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
7343 return off;
7344 }
7345
7346 /*
7347 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
7348 */
7349 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
7350 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
7351 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
7352 kIemNativeGstRegUse_ReadOnly);
7353 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
7354 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
7355 kIemNativeGstRegUse_ReadOnly);
7356
7357 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
7358 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
7359 {
7360 idxRegBase = idxRegIndex;
7361 idxRegIndex = UINT8_MAX;
7362 }
7363
7364#ifdef RT_ARCH_AMD64
7365 if (idxRegIndex == UINT8_MAX)
7366 {
7367 if (u32EffAddr == 0)
7368 {
7369 /* mov ret, base */
7370 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7371 }
7372 else
7373 {
7374 /* lea ret32, [base64 + disp32] */
7375 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
7376 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7377 if (idxRegRet >= 8 || idxRegBase >= 8)
7378 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
7379 pbCodeBuf[off++] = 0x8d;
7380 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7381 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7382 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7383 else
7384 {
7385 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7386 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7387 }
7388 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7389 if (bMod == X86_MOD_MEM4)
7390 {
7391 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7392 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7393 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7394 }
7395 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7396 }
7397 }
7398 else
7399 {
7400 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7401 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7402 if (idxRegBase == UINT8_MAX)
7403 {
7404 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
7405 if (idxRegRet >= 8 || idxRegIndex >= 8)
7406 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7407 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
7408 pbCodeBuf[off++] = 0x8d;
7409 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7410 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7411 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7412 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7413 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7414 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7415 }
7416 else
7417 {
7418 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7419 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7420 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7421 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7422 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
7423 pbCodeBuf[off++] = 0x8d;
7424 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7425 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7426 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7427 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7428 if (bMod != X86_MOD_MEM0)
7429 {
7430 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7431 if (bMod == X86_MOD_MEM4)
7432 {
7433 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7434 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7435 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7436 }
7437 }
7438 }
7439 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7440 }
7441
7442#elif defined(RT_ARCH_ARM64)
7443 if (u32EffAddr == 0)
7444 {
7445 if (idxRegIndex == UINT8_MAX)
7446 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7447 else if (idxRegBase == UINT8_MAX)
7448 {
7449 if (cShiftIndex == 0)
7450 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
7451 else
7452 {
7453 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7454 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
7455 }
7456 }
7457 else
7458 {
7459 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7460 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7461 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
7462 }
7463 }
7464 else
7465 {
7466 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
7467 {
7468 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7469 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
7470 }
7471 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
7472 {
7473 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7474 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
7475 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
7476 }
7477 else
7478 {
7479 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
7480 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7481 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u32EffAddr);
7482 if (idxRegBase != UINT8_MAX)
7483 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
7484 }
7485 if (idxRegIndex != UINT8_MAX)
7486 {
7487 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7488 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7489 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
7490 }
7491 }
7492
7493#else
7494# error "port me"
7495#endif
7496
7497 if (idxRegIndex != UINT8_MAX)
7498 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7499 if (idxRegBase != UINT8_MAX)
7500 iemNativeRegFreeTmp(pReNative, idxRegBase);
7501 return off;
7502}
7503
7504
7505#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7506 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff)
7507
7508#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7509 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 64)
7510
7511#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7512 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 32)
7513
7514
7515
7516/*********************************************************************************************************************************
7517* Memory fetches (IEM_MEM_FETCH_XXX). *
7518*********************************************************************************************************************************/
7519
7520
7521
7522
7523#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7524 off = iemNativeEmitMemFetchDataCommon(pReNative, off, pCallEntry->idxInstr, a_u8Dst, a_iSeg, a_GCPtrMem, sizeof(uint8_t))
7525
7526#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7527 off = iemNativeEmitMemFetchDataCommon(pReNative, off, pCallEntry->idxInstr, a_u16Dst, a_iSeg, a_GCPtrMem, sizeof(uint16_t))
7528
7529#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7530 off = iemNativeEmitMemFetchDataCommon(pReNative, off, pCallEntry->idxInstr, a_u32Dst, a_iSeg, a_GCPtrMem, sizeof(uint32_t))
7531
7532#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7533 off = iemNativeEmitMemFetchDataCommon(pReNative, off, pCallEntry->idxInstr, a_u64Dst, a_iSeg, a_GCPtrMem, sizeof(uint64_t))
7534
7535/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64. */
7536DECL_INLINE_THROW(uint32_t)
7537iemNativeEmitMemFetchDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
7538 uint8_t idxVarDst, uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem)
7539{
7540 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
7541 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7542 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
7543 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
7544 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7545 Assert(iSegReg < 6);
7546 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7547 RT_NOREF(idxInstr);
7548
7549#ifdef VBOX_STRICT
7550 /*
7551 * Check that the fExec flags we've got make sense.
7552 */
7553 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7554#endif
7555
7556 /*
7557 * To keep things simple we have to commit any pending writes first as we
7558 * may end up making calls.
7559 */
7560 /** @todo we could postpone this till we make the call and reload the
7561 * registers after returning from the call. Not sure if that's sensible or
7562 * not, though. */
7563 off = iemNativeRegFlushPendingWrites(pReNative, off);
7564
7565 /*
7566 * Move/spill/flush stuff out of call-volatile registers.
7567 * This is the easy way out. We could contain this to the tlb-miss branch
7568 * by saving and restoring active stuff here.
7569 */
7570 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7571 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7572
7573 /*
7574 * Define labels and allocate the result register (trying for the return
7575 * register if we can).
7576 */
7577 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7578 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7579 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7580 uint8_t const idxRegDst = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7581 ? iemNativeVarSetRegister(pReNative, idxVarDst, IEMNATIVE_CALL_RET_GREG, off)
7582 : iemNativeVarAllocRegister(pReNative, idxVarDst, &off);
7583
7584 /*
7585 * First we try to go via the TLB.
7586 */
7587//pReNative->pInstrBuf[off++] = 0xcc;
7588 /** @todo later. */
7589
7590 /*
7591 * Call helper to do the fetching.
7592 * We flush all guest register shadow copies here.
7593 */
7594 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7595
7596 uint8_t idxRegArgGCPtrMem;
7597 uint8_t idxRegArgInstrIdx;
7598 uintptr_t pfnFunction;
7599 if ( ( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7600 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7601 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT)
7602 && ( iSegReg == X86_SREG_DS
7603 || iSegReg == X86_SREG_ES
7604 || iSegReg == X86_SREG_SS
7605 || (iSegReg == X86_SREG_CS && (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT) ))
7606 {
7607 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7608 switch (cbMem)
7609 {
7610 case 1: pfnFunction = (uintptr_t)iemNativeHlpMemFlatFetchDataU8; break;
7611 case 2: pfnFunction = (uintptr_t)iemNativeHlpMemFlatFetchDataU16; break;
7612 case 4: pfnFunction = (uintptr_t)iemNativeHlpMemFlatFetchDataU32; break;
7613 case 8: pfnFunction = (uintptr_t)iemNativeHlpMemFlatFetchDataU64; break;
7614 default:
7615 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_MEM_SIZE));
7616 }
7617 idxRegArgInstrIdx = IEMNATIVE_CALL_ARG2_GREG;
7618 idxRegArgGCPtrMem = IEMNATIVE_CALL_ARG1_GREG;
7619 }
7620 else
7621 {
7622 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 3);
7623 switch (cbMem)
7624 {
7625 case 1: pfnFunction = (uintptr_t)iemNativeHlpMemFetchDataU8; break;
7626 case 2: pfnFunction = (uintptr_t)iemNativeHlpMemFetchDataU16; break;
7627 case 4: pfnFunction = (uintptr_t)iemNativeHlpMemFetchDataU32; break;
7628 case 8: pfnFunction = (uintptr_t)iemNativeHlpMemFetchDataU64; break;
7629 default:
7630 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_MEM_SIZE));
7631 }
7632 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iSegReg);
7633 idxRegArgInstrIdx = IEMNATIVE_CALL_ARG3_GREG;
7634 idxRegArgGCPtrMem = IEMNATIVE_CALL_ARG2_GREG;
7635 }
7636
7637 off = iemNativeEmitLoadGpr8Imm(pReNative, off, idxRegArgInstrIdx, idxInstr);
7638
7639 if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
7640 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgGCPtrMem, pReNative->Core.aVars[idxVarGCPtrMem].u.uValue);
7641 else
7642 {
7643 uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
7644 if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
7645 {
7646 Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
7647 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgGCPtrMem, idxRegVarGCPtrMem);
7648 }
7649 else
7650 {
7651 AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
7652 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgGCPtrMem, iemNativeVarCalcBpDisp(pReNative, idxVarGCPtrMem));
7653 }
7654 }
7655 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7656 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7657
7658 /* Put the result in the right register. */
7659 Assert(idxRegDst == pReNative->Core.aVars[idxVarDst].idxReg);
7660 if (idxRegDst != IEMNATIVE_CALL_RET_GREG)
7661 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegDst, IEMNATIVE_CALL_RET_GREG);
7662
7663 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7664
7665 return off;
7666}
7667
7668
7669
7670/*********************************************************************************************************************************
7671* Builtin functions *
7672*********************************************************************************************************************************/
7673
7674/**
7675 * Built-in function that calls a C-implemention function taking zero arguments.
7676 */
7677static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
7678{
7679 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
7680 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
7681 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
7682 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
7683}
7684
7685
7686/**
7687 * Built-in function that checks for pending interrupts that can be delivered or
7688 * forced action flags.
7689 *
7690 * This triggers after the completion of an instruction, so EIP is already at
7691 * the next instruction. If an IRQ or important FF is pending, this will return
7692 * a non-zero status that stops TB execution.
7693 */
7694static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
7695{
7696 RT_NOREF(pCallEntry);
7697
7698 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
7699 and I'm too lazy to create a 'Fixed' version of that one. */
7700 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
7701 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
7702
7703 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
7704
7705 /* Again, we need to load the extended EFLAGS before we actually need them
7706 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
7707 loaded them inside the check, as the shadow state would not be correct
7708 when the code branches before the load. Ditto PC. */
7709 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7710 kIemNativeGstRegUse_ReadOnly);
7711
7712 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
7713
7714 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7715
7716 /*
7717 * Start by checking the local forced actions of the EMT we're on for IRQs
7718 * and other FFs that needs servicing.
7719 */
7720 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
7721 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
7722 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
7723 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
7724 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
7725 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
7726 | VMCPU_FF_TLB_FLUSH
7727 | VMCPU_FF_UNHALT ),
7728 true /*fSetFlags*/);
7729 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
7730 uint32_t const offFixupJumpToVmCheck1 = off;
7731 off = iemNativeEmitJzToFixed(pReNative, off, 0);
7732
7733 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
7734 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
7735 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
7736 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
7737 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
7738 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
7739
7740 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
7741 suppressed by the CPU or not. */
7742 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
7743 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
7744 idxLabelReturnBreak);
7745
7746 /* We've got shadow flags set, so we must check that the PC they are valid
7747 for matches our current PC value. */
7748 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
7749 * a register. */
7750 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
7751 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
7752
7753 /*
7754 * Now check the force flags of the VM.
7755 */
7756 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
7757 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
7758 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
7759 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
7760 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
7761 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
7762
7763 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
7764
7765 /*
7766 * We're good, no IRQs or FFs pending.
7767 */
7768 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7769 iemNativeRegFreeTmp(pReNative, idxEflReg);
7770 iemNativeRegFreeTmp(pReNative, idxPcReg);
7771
7772 return off;
7773}
7774
7775
7776/**
7777 * Built-in function checks if IEMCPU::fExec has the expected value.
7778 */
7779static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
7780{
7781 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
7782 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7783
7784 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
7785 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
7786 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
7787 kIemNativeLabelType_ReturnBreak);
7788 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7789 return off;
7790}
7791
7792
7793
7794/*********************************************************************************************************************************
7795* The native code generator functions for each MC block. *
7796*********************************************************************************************************************************/
7797
7798
7799/*
7800 * Include g_apfnIemNativeRecompileFunctions and associated functions.
7801 *
7802 * This should probably live in it's own file later, but lets see what the
7803 * compile times turn out to be first.
7804 */
7805#include "IEMNativeFunctions.cpp.h"
7806
7807
7808
7809/*********************************************************************************************************************************
7810* Recompiler Core. *
7811*********************************************************************************************************************************/
7812
7813
7814/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
7815static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
7816{
7817 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
7818 pDis->cbCachedInstr += cbMaxRead;
7819 RT_NOREF(cbMinRead);
7820 return VERR_NO_DATA;
7821}
7822
7823
7824/**
7825 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
7826 * @returns pszBuf.
7827 * @param fFlags The flags.
7828 * @param pszBuf The output buffer.
7829 * @param cbBuf The output buffer size. At least 32 bytes.
7830 */
7831DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
7832{
7833 Assert(cbBuf >= 32);
7834 static RTSTRTUPLE const s_aModes[] =
7835 {
7836 /* [00] = */ { RT_STR_TUPLE("16BIT") },
7837 /* [01] = */ { RT_STR_TUPLE("32BIT") },
7838 /* [02] = */ { RT_STR_TUPLE("!2!") },
7839 /* [03] = */ { RT_STR_TUPLE("!3!") },
7840 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
7841 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
7842 /* [06] = */ { RT_STR_TUPLE("!6!") },
7843 /* [07] = */ { RT_STR_TUPLE("!7!") },
7844 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
7845 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
7846 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
7847 /* [0b] = */ { RT_STR_TUPLE("!b!") },
7848 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
7849 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
7850 /* [0e] = */ { RT_STR_TUPLE("!e!") },
7851 /* [0f] = */ { RT_STR_TUPLE("!f!") },
7852 /* [10] = */ { RT_STR_TUPLE("!10!") },
7853 /* [11] = */ { RT_STR_TUPLE("!11!") },
7854 /* [12] = */ { RT_STR_TUPLE("!12!") },
7855 /* [13] = */ { RT_STR_TUPLE("!13!") },
7856 /* [14] = */ { RT_STR_TUPLE("!14!") },
7857 /* [15] = */ { RT_STR_TUPLE("!15!") },
7858 /* [16] = */ { RT_STR_TUPLE("!16!") },
7859 /* [17] = */ { RT_STR_TUPLE("!17!") },
7860 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
7861 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
7862 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
7863 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
7864 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
7865 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
7866 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
7867 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
7868 };
7869 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
7870 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
7871 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
7872
7873 pszBuf[off++] = ' ';
7874 pszBuf[off++] = 'C';
7875 pszBuf[off++] = 'P';
7876 pszBuf[off++] = 'L';
7877 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
7878 Assert(off < 32);
7879
7880 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
7881
7882 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
7883 {
7884 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
7885 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
7886 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
7887 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
7888 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
7889 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
7890 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
7891 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
7892 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
7893 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
7894 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
7895 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
7896 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
7897 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
7898 };
7899 if (fFlags)
7900 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
7901 if (s_aFlags[i].fFlag & fFlags)
7902 {
7903 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
7904 pszBuf[off++] = ' ';
7905 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
7906 off += s_aFlags[i].cchName;
7907 fFlags &= ~s_aFlags[i].fFlag;
7908 if (!fFlags)
7909 break;
7910 }
7911 pszBuf[off] = '\0';
7912
7913 return pszBuf;
7914}
7915
7916
7917DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
7918{
7919 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
7920
7921 char szDisBuf[512];
7922 DISSTATE Dis;
7923 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
7924 uint32_t const cNative = pTb->Native.cInstructions;
7925 uint32_t offNative = 0;
7926#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7927 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
7928#endif
7929 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
7930 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
7931 : DISCPUMODE_64BIT;
7932#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7933 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
7934#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7935 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
7936#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7937# error "Port me"
7938#else
7939 csh hDisasm = ~(size_t)0;
7940# if defined(RT_ARCH_AMD64)
7941 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
7942# elif defined(RT_ARCH_ARM64)
7943 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
7944# else
7945# error "Port me"
7946# endif
7947 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
7948#endif
7949
7950 /*
7951 * Print TB info.
7952 */
7953 pHlp->pfnPrintf(pHlp,
7954 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
7955 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
7956 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
7957 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
7958#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7959 if (pDbgInfo && pDbgInfo->cEntries > 1)
7960 {
7961 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
7962
7963 /*
7964 * This disassembly is driven by the debug info which follows the native
7965 * code and indicates when it starts with the next guest instructions,
7966 * where labels are and such things.
7967 */
7968 uint32_t idxThreadedCall = 0;
7969 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
7970 uint8_t idxRange = UINT8_MAX;
7971 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
7972 uint32_t offRange = 0;
7973 uint32_t offOpcodes = 0;
7974 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
7975 uint32_t const cDbgEntries = pDbgInfo->cEntries;
7976 uint32_t iDbgEntry = 1;
7977 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
7978
7979 while (offNative < cNative)
7980 {
7981 /* If we're at or have passed the point where the next chunk of debug
7982 info starts, process it. */
7983 if (offDbgNativeNext <= offNative)
7984 {
7985 offDbgNativeNext = UINT32_MAX;
7986 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
7987 {
7988 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
7989 {
7990 case kIemTbDbgEntryType_GuestInstruction:
7991 {
7992 /* Did the exec flag change? */
7993 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
7994 {
7995 pHlp->pfnPrintf(pHlp,
7996 " fExec change %#08x -> %#08x %s\n",
7997 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
7998 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
7999 szDisBuf, sizeof(szDisBuf)));
8000 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8001 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8002 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8003 : DISCPUMODE_64BIT;
8004 }
8005
8006 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8007 where the compilation was aborted before the opcode was recorded and the actual
8008 instruction was translated to a threaded call. This may happen when we run out
8009 of ranges, or when some complicated interrupts/FFs are found to be pending or
8010 similar. So, we just deal with it here rather than in the compiler code as it
8011 is a lot simpler to do up here. */
8012 if ( idxRange == UINT8_MAX
8013 || idxRange >= cRanges
8014 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8015 {
8016 idxRange += 1;
8017 if (idxRange < cRanges)
8018 offRange = 0;
8019 else
8020 continue;
8021 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
8022 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8023 + (pTb->aRanges[idxRange].idxPhysPage == 0
8024 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8025 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8026 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8027 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8028 pTb->aRanges[idxRange].idxPhysPage);
8029 }
8030
8031 /* Disassemble the instruction. */
8032 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8033 uint32_t cbInstr = 1;
8034 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8035 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8036 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8037 if (RT_SUCCESS(rc))
8038 {
8039 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8040 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8041 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8042 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8043
8044 static unsigned const s_offMarker = 55;
8045 static char const s_szMarker[] = " ; <--- guest";
8046 if (cch < s_offMarker)
8047 {
8048 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8049 cch = s_offMarker;
8050 }
8051 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8052 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8053
8054 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8055 }
8056 else
8057 {
8058 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8059 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8060 cbInstr = 1;
8061 }
8062 GCPhysPc += cbInstr;
8063 offOpcodes += cbInstr;
8064 offRange += cbInstr;
8065 continue;
8066 }
8067
8068 case kIemTbDbgEntryType_ThreadedCall:
8069 pHlp->pfnPrintf(pHlp,
8070 " Call #%u to %s (%u args)%s\n",
8071 idxThreadedCall,
8072 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8073 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8074 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
8075 idxThreadedCall++;
8076 continue;
8077
8078 case kIemTbDbgEntryType_GuestRegShadowing:
8079 {
8080 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8081 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8082 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8083 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8084 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8085 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8086 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8087 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8088 else
8089 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8090 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8091 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8092 continue;
8093 }
8094
8095 case kIemTbDbgEntryType_Label:
8096 {
8097 const char *pszName = "what_the_fudge";
8098 const char *pszComment = "";
8099 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8100 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8101 {
8102 case kIemNativeLabelType_Return:
8103 pszName = "Return";
8104 break;
8105 case kIemNativeLabelType_ReturnBreak:
8106 pszName = "ReturnBreak";
8107 break;
8108 case kIemNativeLabelType_ReturnWithFlags:
8109 pszName = "ReturnWithFlags";
8110 break;
8111 case kIemNativeLabelType_NonZeroRetOrPassUp:
8112 pszName = "NonZeroRetOrPassUp";
8113 break;
8114 case kIemNativeLabelType_RaiseGp0:
8115 pszName = "RaiseGp0";
8116 break;
8117 case kIemNativeLabelType_If:
8118 pszName = "If";
8119 fNumbered = true;
8120 break;
8121 case kIemNativeLabelType_Else:
8122 pszName = "Else";
8123 fNumbered = true;
8124 pszComment = " ; regs state restored pre-if-block";
8125 break;
8126 case kIemNativeLabelType_Endif:
8127 pszName = "Endif";
8128 fNumbered = true;
8129 break;
8130 case kIemNativeLabelType_CheckIrq:
8131 pszName = "CheckIrq_CheckVM";
8132 fNumbered = true;
8133 break;
8134 case kIemNativeLabelType_TlbMiss:
8135 pszName = "CheckIrq_TlbMiss";
8136 fNumbered = true;
8137 break;
8138 case kIemNativeLabelType_TlbDone:
8139 pszName = "CheckIrq_TlbDone";
8140 fNumbered = true;
8141 break;
8142 case kIemNativeLabelType_Invalid:
8143 case kIemNativeLabelType_End:
8144 break;
8145 }
8146 if (fNumbered)
8147 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8148 else
8149 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8150 continue;
8151 }
8152
8153 case kIemTbDbgEntryType_NativeOffset:
8154 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8155 Assert(offDbgNativeNext > offNative);
8156 break;
8157
8158 default:
8159 AssertFailed();
8160 }
8161 iDbgEntry++;
8162 break;
8163 }
8164 }
8165
8166 /*
8167 * Disassemble the next native instruction.
8168 */
8169 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8170# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8171 uint32_t cbInstr = sizeof(paNative[0]);
8172 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8173 if (RT_SUCCESS(rc))
8174 {
8175# if defined(RT_ARCH_AMD64)
8176 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8177 {
8178 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8179 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8180 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
8181 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8182 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8183 uInfo & 0x8000 ? " - recompiled" : "");
8184 else
8185 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8186 }
8187 else
8188# endif
8189 {
8190# ifdef RT_ARCH_AMD64
8191 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8192 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8193 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8194 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8195# elif defined(RT_ARCH_ARM64)
8196 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8197 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8198 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8199# else
8200# error "Port me"
8201# endif
8202 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8203 }
8204 }
8205 else
8206 {
8207# if defined(RT_ARCH_AMD64)
8208 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8209 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8210# elif defined(RT_ARCH_ARM64)
8211 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8212# else
8213# error "Port me"
8214# endif
8215 cbInstr = sizeof(paNative[0]);
8216 }
8217 offNative += cbInstr / sizeof(paNative[0]);
8218
8219# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8220 cs_insn *pInstr;
8221 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8222 (uintptr_t)pNativeCur, 1, &pInstr);
8223 if (cInstrs > 0)
8224 {
8225 Assert(cInstrs == 1);
8226# if defined(RT_ARCH_AMD64)
8227 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8228 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8229# else
8230 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8231 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8232# endif
8233 offNative += pInstr->size / sizeof(*pNativeCur);
8234 cs_free(pInstr, cInstrs);
8235 }
8236 else
8237 {
8238# if defined(RT_ARCH_AMD64)
8239 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8240 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8241# else
8242 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8243# endif
8244 offNative++;
8245 }
8246# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8247 }
8248 }
8249 else
8250#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8251 {
8252 /*
8253 * No debug info, just disassemble the x86 code and then the native code.
8254 *
8255 * First the guest code:
8256 */
8257 for (unsigned i = 0; i < pTb->cRanges; i++)
8258 {
8259 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8260 + (pTb->aRanges[i].idxPhysPage == 0
8261 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8262 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8263 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8264 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8265 unsigned off = pTb->aRanges[i].offOpcodes;
8266 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8267 while (off < cbOpcodes)
8268 {
8269 uint32_t cbInstr = 1;
8270 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8271 &pTb->pabOpcodes[off], cbOpcodes - off,
8272 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8273 if (RT_SUCCESS(rc))
8274 {
8275 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8276 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8277 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8278 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8279 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8280 GCPhysPc += cbInstr;
8281 off += cbInstr;
8282 }
8283 else
8284 {
8285 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8286 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8287 break;
8288 }
8289 }
8290 }
8291
8292 /*
8293 * Then the native code:
8294 */
8295 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8296 while (offNative < cNative)
8297 {
8298 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8299# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8300 uint32_t cbInstr = sizeof(paNative[0]);
8301 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8302 if (RT_SUCCESS(rc))
8303 {
8304# if defined(RT_ARCH_AMD64)
8305 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8306 {
8307 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8308 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8309 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
8310 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8311 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8312 uInfo & 0x8000 ? " - recompiled" : "");
8313 else
8314 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8315 }
8316 else
8317# endif
8318 {
8319# ifdef RT_ARCH_AMD64
8320 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8321 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8322 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8323 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8324# elif defined(RT_ARCH_ARM64)
8325 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8326 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8327 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8328# else
8329# error "Port me"
8330# endif
8331 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8332 }
8333 }
8334 else
8335 {
8336# if defined(RT_ARCH_AMD64)
8337 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8338 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8339# else
8340 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8341# endif
8342 cbInstr = sizeof(paNative[0]);
8343 }
8344 offNative += cbInstr / sizeof(paNative[0]);
8345
8346# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8347 cs_insn *pInstr;
8348 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8349 (uintptr_t)pNativeCur, 1, &pInstr);
8350 if (cInstrs > 0)
8351 {
8352 Assert(cInstrs == 1);
8353# if defined(RT_ARCH_AMD64)
8354 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8355 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8356# else
8357 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8358 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8359# endif
8360 offNative += pInstr->size / sizeof(*pNativeCur);
8361 cs_free(pInstr, cInstrs);
8362 }
8363 else
8364 {
8365# if defined(RT_ARCH_AMD64)
8366 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8367 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8368# else
8369 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8370# endif
8371 offNative++;
8372 }
8373# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8374 }
8375 }
8376
8377#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8378 /* Cleanup. */
8379 cs_close(&hDisasm);
8380#endif
8381}
8382
8383
8384/**
8385 * Recompiles the given threaded TB into a native one.
8386 *
8387 * In case of failure the translation block will be returned as-is.
8388 *
8389 * @returns pTb.
8390 * @param pVCpu The cross context virtual CPU structure of the calling
8391 * thread.
8392 * @param pTb The threaded translation to recompile to native.
8393 */
8394DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8395{
8396 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
8397
8398 /*
8399 * The first time thru, we allocate the recompiler state, the other times
8400 * we just need to reset it before using it again.
8401 */
8402 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
8403 if (RT_LIKELY(pReNative))
8404 iemNativeReInit(pReNative, pTb);
8405 else
8406 {
8407 pReNative = iemNativeInit(pVCpu, pTb);
8408 AssertReturn(pReNative, pTb);
8409 }
8410
8411 /*
8412 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
8413 * for aborting if an error happens.
8414 */
8415 uint32_t cCallsLeft = pTb->Thrd.cCalls;
8416#ifdef LOG_ENABLED
8417 uint32_t const cCallsOrg = cCallsLeft;
8418#endif
8419 uint32_t off = 0;
8420 int rc = VINF_SUCCESS;
8421 IEMNATIVE_TRY_SETJMP(pReNative, rc)
8422 {
8423 /*
8424 * Emit prolog code (fixed).
8425 */
8426 off = iemNativeEmitProlog(pReNative, off);
8427
8428 /*
8429 * Convert the calls to native code.
8430 */
8431#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8432 int32_t iGstInstr = -1;
8433#endif
8434#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
8435 uint32_t cThreadedCalls = 0;
8436 uint32_t cRecompiledCalls = 0;
8437#endif
8438 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
8439 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
8440 while (cCallsLeft-- > 0)
8441 {
8442 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
8443
8444 /*
8445 * Debug info and assembly markup.
8446 */
8447 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
8448 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
8449#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8450 iemNativeDbgInfoAddNativeOffset(pReNative, off);
8451 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
8452 {
8453 if (iGstInstr < (int32_t)pTb->cInstructions)
8454 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
8455 else
8456 Assert(iGstInstr == pTb->cInstructions);
8457 iGstInstr = pCallEntry->idxInstr;
8458 }
8459 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
8460#endif
8461#if defined(VBOX_STRICT)
8462 off = iemNativeEmitMarker(pReNative, off,
8463 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
8464 pCallEntry->enmFunction));
8465#endif
8466#if defined(VBOX_STRICT)
8467 iemNativeRegAssertSanity(pReNative);
8468#endif
8469
8470 /*
8471 * Actual work.
8472 */
8473 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
8474 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "" : "(todo)"));
8475 if (pfnRecom) /** @todo stats on this. */
8476 {
8477 off = pfnRecom(pReNative, off, pCallEntry);
8478 STAM_REL_STATS({cRecompiledCalls++;});
8479 }
8480 else
8481 {
8482 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
8483 STAM_REL_STATS({cThreadedCalls++;});
8484 }
8485 Assert(off <= pReNative->cInstrBufAlloc);
8486 Assert(pReNative->cCondDepth == 0);
8487
8488 /*
8489 * Advance.
8490 */
8491 pCallEntry++;
8492 }
8493
8494 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
8495 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
8496 if (!cThreadedCalls)
8497 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
8498
8499 /*
8500 * Emit the epilog code.
8501 */
8502 uint32_t idxReturnLabel;
8503 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
8504
8505 /*
8506 * Generate special jump labels.
8507 */
8508 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
8509 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
8510 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
8511 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
8512 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
8513 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
8514 }
8515 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
8516 {
8517 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
8518 return pTb;
8519 }
8520 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
8521 Assert(off <= pReNative->cInstrBufAlloc);
8522
8523 /*
8524 * Make sure all labels has been defined.
8525 */
8526 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
8527#ifdef VBOX_STRICT
8528 uint32_t const cLabels = pReNative->cLabels;
8529 for (uint32_t i = 0; i < cLabels; i++)
8530 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
8531#endif
8532
8533 /*
8534 * Allocate executable memory, copy over the code we've generated.
8535 */
8536 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
8537 if (pTbAllocator->pDelayedFreeHead)
8538 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
8539
8540 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
8541 AssertReturn(paFinalInstrBuf, pTb);
8542 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
8543
8544 /*
8545 * Apply fixups.
8546 */
8547 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
8548 uint32_t const cFixups = pReNative->cFixups;
8549 for (uint32_t i = 0; i < cFixups; i++)
8550 {
8551 Assert(paFixups[i].off < off);
8552 Assert(paFixups[i].idxLabel < cLabels);
8553 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
8554 switch (paFixups[i].enmType)
8555 {
8556#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
8557 case kIemNativeFixupType_Rel32:
8558 Assert(paFixups[i].off + 4 <= off);
8559 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8560 continue;
8561
8562#elif defined(RT_ARCH_ARM64)
8563 case kIemNativeFixupType_RelImm26At0:
8564 {
8565 Assert(paFixups[i].off < off);
8566 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8567 Assert(offDisp >= -262144 && offDisp < 262144);
8568 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
8569 continue;
8570 }
8571
8572 case kIemNativeFixupType_RelImm19At5:
8573 {
8574 Assert(paFixups[i].off < off);
8575 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8576 Assert(offDisp >= -262144 && offDisp < 262144);
8577 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
8578 continue;
8579 }
8580
8581 case kIemNativeFixupType_RelImm14At5:
8582 {
8583 Assert(paFixups[i].off < off);
8584 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8585 Assert(offDisp >= -8192 && offDisp < 8192);
8586 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
8587 continue;
8588 }
8589
8590#endif
8591 case kIemNativeFixupType_Invalid:
8592 case kIemNativeFixupType_End:
8593 break;
8594 }
8595 AssertFailed();
8596 }
8597
8598 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
8599 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
8600
8601 /*
8602 * Convert the translation block.
8603 */
8604 RTMemFree(pTb->Thrd.paCalls);
8605 pTb->Native.paInstructions = paFinalInstrBuf;
8606 pTb->Native.cInstructions = off;
8607 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
8608#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8609 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
8610 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
8611#endif
8612
8613 Assert(pTbAllocator->cThreadedTbs > 0);
8614 pTbAllocator->cThreadedTbs -= 1;
8615 pTbAllocator->cNativeTbs += 1;
8616 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
8617
8618#ifdef LOG_ENABLED
8619 /*
8620 * Disassemble to the log if enabled.
8621 */
8622 if (LogIs3Enabled())
8623 {
8624 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
8625 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
8626# ifdef DEBUG_bird
8627 RTLogFlush(NULL);
8628# endif
8629 }
8630#endif
8631
8632 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
8633 return pTb;
8634}
8635
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette