VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102071

Last change on this file since 102071 was 102071, checked in by vboxsync, 15 months ago

VMM/IEM: Native translation of IEM_MC_CALC_RM_EFF_ADDR_THREADED_16. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 333.2 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102071 2023-11-12 22:46:20Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128
129/*********************************************************************************************************************************
130* Defined Constants And Macros *
131*********************************************************************************************************************************/
132/** Always count instructions for now. */
133#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
134
135
136/*********************************************************************************************************************************
137* Internal Functions *
138*********************************************************************************************************************************/
139#ifdef VBOX_STRICT
140static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
141 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
142static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
143#endif
144#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
145static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
146static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
147#endif
148DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
149DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
150 IEMNATIVEGSTREG enmGstReg, uint32_t off);
151
152
153/*********************************************************************************************************************************
154* Executable Memory Allocator *
155*********************************************************************************************************************************/
156/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157 * Use an alternative chunk sub-allocator that does store internal data
158 * in the chunk.
159 *
160 * Using the RTHeapSimple is not practial on newer darwin systems where
161 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
162 * memory. We would have to change the protection of the whole chunk for
163 * every call to RTHeapSimple, which would be rather expensive.
164 *
165 * This alternative implemenation let restrict page protection modifications
166 * to the pages backing the executable memory we just allocated.
167 */
168#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
169/** The chunk sub-allocation unit size in bytes. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
171/** The chunk sub-allocation unit size as a shift factor. */
172#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
173
174#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
175# ifdef IEMNATIVE_USE_GDB_JIT
176# define IEMNATIVE_USE_GDB_JIT_ET_DYN
177
178/** GDB JIT: Code entry. */
179typedef struct GDBJITCODEENTRY
180{
181 struct GDBJITCODEENTRY *pNext;
182 struct GDBJITCODEENTRY *pPrev;
183 uint8_t *pbSymFile;
184 uint64_t cbSymFile;
185} GDBJITCODEENTRY;
186
187/** GDB JIT: Actions. */
188typedef enum GDBJITACTIONS : uint32_t
189{
190 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
191} GDBJITACTIONS;
192
193/** GDB JIT: Descriptor. */
194typedef struct GDBJITDESCRIPTOR
195{
196 uint32_t uVersion;
197 GDBJITACTIONS enmAction;
198 GDBJITCODEENTRY *pRelevant;
199 GDBJITCODEENTRY *pHead;
200 /** Our addition: */
201 GDBJITCODEENTRY *pTail;
202} GDBJITDESCRIPTOR;
203
204/** GDB JIT: Our simple symbol file data. */
205typedef struct GDBJITSYMFILE
206{
207 Elf64_Ehdr EHdr;
208# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
209 Elf64_Shdr aShdrs[5];
210# else
211 Elf64_Shdr aShdrs[7];
212 Elf64_Phdr aPhdrs[2];
213# endif
214 /** The dwarf ehframe data for the chunk. */
215 uint8_t abEhFrame[512];
216 char szzStrTab[128];
217 Elf64_Sym aSymbols[3];
218# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
219 Elf64_Sym aDynSyms[2];
220 Elf64_Dyn aDyn[6];
221# endif
222} GDBJITSYMFILE;
223
224extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
225extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
226
227/** Init once for g_IemNativeGdbJitLock. */
228static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
229/** Init once for the critical section. */
230static RTCRITSECT g_IemNativeGdbJitLock;
231
232/** GDB reads the info here. */
233GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
234
235/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
236DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
237{
238 ASMNopPause();
239}
240
241/** @callback_method_impl{FNRTONCE} */
242static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
243{
244 RT_NOREF(pvUser);
245 return RTCritSectInit(&g_IemNativeGdbJitLock);
246}
247
248
249# endif /* IEMNATIVE_USE_GDB_JIT */
250
251/**
252 * Per-chunk unwind info for non-windows hosts.
253 */
254typedef struct IEMEXECMEMCHUNKEHFRAME
255{
256# ifdef IEMNATIVE_USE_LIBUNWIND
257 /** The offset of the FDA into abEhFrame. */
258 uintptr_t offFda;
259# else
260 /** 'struct object' storage area. */
261 uint8_t abObject[1024];
262# endif
263# ifdef IEMNATIVE_USE_GDB_JIT
264# if 0
265 /** The GDB JIT 'symbol file' data. */
266 GDBJITSYMFILE GdbJitSymFile;
267# endif
268 /** The GDB JIT list entry. */
269 GDBJITCODEENTRY GdbJitEntry;
270# endif
271 /** The dwarf ehframe data for the chunk. */
272 uint8_t abEhFrame[512];
273} IEMEXECMEMCHUNKEHFRAME;
274/** Pointer to per-chunk info info for non-windows hosts. */
275typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
276#endif
277
278
279/**
280 * An chunk of executable memory.
281 */
282typedef struct IEMEXECMEMCHUNK
283{
284#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
285 /** Number of free items in this chunk. */
286 uint32_t cFreeUnits;
287 /** Hint were to start searching for free space in the allocation bitmap. */
288 uint32_t idxFreeHint;
289#else
290 /** The heap handle. */
291 RTHEAPSIMPLE hHeap;
292#endif
293 /** Pointer to the chunk. */
294 void *pvChunk;
295#ifdef IN_RING3
296 /**
297 * Pointer to the unwind information.
298 *
299 * This is used during C++ throw and longjmp (windows and probably most other
300 * platforms). Some debuggers (windbg) makes use of it as well.
301 *
302 * Windows: This is allocated from hHeap on windows because (at least for
303 * AMD64) the UNWIND_INFO structure address in the
304 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
305 *
306 * Others: Allocated from the regular heap to avoid unnecessary executable data
307 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
308 void *pvUnwindInfo;
309#elif defined(IN_RING0)
310 /** Allocation handle. */
311 RTR0MEMOBJ hMemObj;
312#endif
313} IEMEXECMEMCHUNK;
314/** Pointer to a memory chunk. */
315typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
316
317
318/**
319 * Executable memory allocator for the native recompiler.
320 */
321typedef struct IEMEXECMEMALLOCATOR
322{
323 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
324 uint32_t uMagic;
325
326 /** The chunk size. */
327 uint32_t cbChunk;
328 /** The maximum number of chunks. */
329 uint32_t cMaxChunks;
330 /** The current number of chunks. */
331 uint32_t cChunks;
332 /** Hint where to start looking for available memory. */
333 uint32_t idxChunkHint;
334 /** Statistics: Current number of allocations. */
335 uint32_t cAllocations;
336
337 /** The total amount of memory available. */
338 uint64_t cbTotal;
339 /** Total amount of free memory. */
340 uint64_t cbFree;
341 /** Total amount of memory allocated. */
342 uint64_t cbAllocated;
343
344#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
345 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
346 *
347 * Since the chunk size is a power of two and the minimum chunk size is a lot
348 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
349 * require a whole number of uint64_t elements in the allocation bitmap. So,
350 * for sake of simplicity, they are allocated as one continous chunk for
351 * simplicity/laziness. */
352 uint64_t *pbmAlloc;
353 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
354 uint32_t cUnitsPerChunk;
355 /** Number of bitmap elements per chunk (for quickly locating the bitmap
356 * portion corresponding to an chunk). */
357 uint32_t cBitmapElementsPerChunk;
358#else
359 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
360 * @{ */
361 /** The size of the heap internal block header. This is used to adjust the
362 * request memory size to make sure there is exacly enough room for a header at
363 * the end of the blocks we allocate before the next 64 byte alignment line. */
364 uint32_t cbHeapBlockHdr;
365 /** The size of initial heap allocation required make sure the first
366 * allocation is correctly aligned. */
367 uint32_t cbHeapAlignTweak;
368 /** The alignment tweak allocation address. */
369 void *pvAlignTweak;
370 /** @} */
371#endif
372
373#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
374 /** Pointer to the array of unwind info running parallel to aChunks (same
375 * allocation as this structure, located after the bitmaps).
376 * (For Windows, the structures must reside in 32-bit RVA distance to the
377 * actual chunk, so they are allocated off the chunk.) */
378 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
379#endif
380
381 /** The allocation chunks. */
382 RT_FLEXIBLE_ARRAY_EXTENSION
383 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
384} IEMEXECMEMALLOCATOR;
385/** Pointer to an executable memory allocator. */
386typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
387
388/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
389#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
390
391
392static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
393
394
395/**
396 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
397 * the heap statistics.
398 */
399static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
400 uint32_t cbReq, uint32_t idxChunk)
401{
402 pExecMemAllocator->cAllocations += 1;
403 pExecMemAllocator->cbAllocated += cbReq;
404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
405 pExecMemAllocator->cbFree -= cbReq;
406#else
407 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
408#endif
409 pExecMemAllocator->idxChunkHint = idxChunk;
410
411#ifdef RT_OS_DARWIN
412 /*
413 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
414 * on darwin. So, we mark the pages returned as read+write after alloc and
415 * expect the caller to call iemExecMemAllocatorReadyForUse when done
416 * writing to the allocation.
417 *
418 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
419 * for details.
420 */
421 /** @todo detect if this is necessary... it wasn't required on 10.15 or
422 * whatever older version it was. */
423 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
424 AssertRC(rc);
425#endif
426
427 return pvRet;
428}
429
430
431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
432static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
433 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
434{
435 /*
436 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
437 */
438 Assert(!(cToScan & 63));
439 Assert(!(idxFirst & 63));
440 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
441 pbmAlloc += idxFirst / 64;
442
443 /*
444 * Scan the bitmap for cReqUnits of consequtive clear bits
445 */
446 /** @todo This can probably be done more efficiently for non-x86 systems. */
447 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
448 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
449 {
450 uint32_t idxAddBit = 1;
451 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
452 idxAddBit++;
453 if (idxAddBit >= cReqUnits)
454 {
455 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
456
457 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
458 pChunk->cFreeUnits -= cReqUnits;
459 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
460
461 void * const pvRet = (uint8_t *)pChunk->pvChunk
462 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
463
464 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
465 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
466 }
467
468 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
469 }
470 return NULL;
471}
472#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
473
474
475static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
476{
477#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
478 /*
479 * Figure out how much to allocate.
480 */
481 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
482 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
483 {
484 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
485 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
486 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
487 {
488 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
489 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
490 if (pvRet)
491 return pvRet;
492 }
493 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
494 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
495 cReqUnits, idxChunk);
496 }
497#else
498 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
499 if (pvRet)
500 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
501#endif
502 return NULL;
503
504}
505
506
507/**
508 * Allocates @a cbReq bytes of executable memory.
509 *
510 * @returns Pointer to the memory, NULL if out of memory or other problem
511 * encountered.
512 * @param pVCpu The cross context virtual CPU structure of the calling
513 * thread.
514 * @param cbReq How many bytes are required.
515 */
516static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
517{
518 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
519 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
520 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
521
522 /*
523 * Adjust the request size so it'll fit the allocator alignment/whatnot.
524 *
525 * For the RTHeapSimple allocator this means to follow the logic described
526 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
527 * existing chunks if we think we've got sufficient free memory around.
528 *
529 * While for the alternative one we just align it up to a whole unit size.
530 */
531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
532 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
533#else
534 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
535#endif
536 if (cbReq <= pExecMemAllocator->cbFree)
537 {
538 uint32_t const cChunks = pExecMemAllocator->cChunks;
539 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
540 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
541 {
542 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
543 if (pvRet)
544 return pvRet;
545 }
546 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
547 {
548 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
549 if (pvRet)
550 return pvRet;
551 }
552 }
553
554 /*
555 * Can we grow it with another chunk?
556 */
557 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
558 {
559 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
560 AssertLogRelRCReturn(rc, NULL);
561
562 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
563 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
564 if (pvRet)
565 return pvRet;
566 AssertFailed();
567 }
568
569 /* What now? Prune native translation blocks from the cache? */
570 AssertFailed();
571 return NULL;
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1588#ifndef _MSC_VER
1589 return VINF_IEM_RAISED_XCPT; /* not reached */
1590#endif
1591}
1592
1593
1594/**
1595 * Reinitializes the native recompiler state.
1596 *
1597 * Called before starting a new recompile job.
1598 */
1599static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1600{
1601 pReNative->cLabels = 0;
1602 pReNative->bmLabelTypes = 0;
1603 pReNative->cFixups = 0;
1604#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1605 pReNative->pDbgInfo->cEntries = 0;
1606#endif
1607 pReNative->pTbOrg = pTb;
1608 pReNative->cCondDepth = 0;
1609 pReNative->uCondSeqNo = 0;
1610 pReNative->uCheckIrqSeqNo = 0;
1611
1612 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1613#if IEMNATIVE_HST_GREG_COUNT < 32
1614 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1615#endif
1616 ;
1617 pReNative->Core.bmHstRegsWithGstShadow = 0;
1618 pReNative->Core.bmGstRegShadows = 0;
1619 pReNative->Core.bmVars = 0;
1620 pReNative->Core.bmStack = 0;
1621 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1622 pReNative->Core.u64ArgVars = UINT64_MAX;
1623
1624 /* Full host register reinit: */
1625 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1626 {
1627 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1628 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1629 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1630 }
1631
1632 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1633 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1634#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1635 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1636#endif
1637#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1638 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1639#endif
1640 );
1641 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1642 {
1643 fRegs &= ~RT_BIT_32(idxReg);
1644 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1645 }
1646
1647 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1648#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1649 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1650#endif
1651#ifdef IEMNATIVE_REG_FIXED_TMP0
1652 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1653#endif
1654 return pReNative;
1655}
1656
1657
1658/**
1659 * Allocates and initializes the native recompiler state.
1660 *
1661 * This is called the first time an EMT wants to recompile something.
1662 *
1663 * @returns Pointer to the new recompiler state.
1664 * @param pVCpu The cross context virtual CPU structure of the calling
1665 * thread.
1666 * @param pTb The TB that's about to be recompiled.
1667 * @thread EMT(pVCpu)
1668 */
1669static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1670{
1671 VMCPU_ASSERT_EMT(pVCpu);
1672
1673 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1674 AssertReturn(pReNative, NULL);
1675
1676 /*
1677 * Try allocate all the buffers and stuff we need.
1678 */
1679 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1680 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1681 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1682#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1683 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1684#endif
1685 if (RT_LIKELY( pReNative->pInstrBuf
1686 && pReNative->paLabels
1687 && pReNative->paFixups)
1688#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1689 && pReNative->pDbgInfo
1690#endif
1691 )
1692 {
1693 /*
1694 * Set the buffer & array sizes on success.
1695 */
1696 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1697 pReNative->cLabelsAlloc = _8K;
1698 pReNative->cFixupsAlloc = _16K;
1699#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1700 pReNative->cDbgInfoAlloc = _16K;
1701#endif
1702
1703 /*
1704 * Done, just need to save it and reinit it.
1705 */
1706 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1707 return iemNativeReInit(pReNative, pTb);
1708 }
1709
1710 /*
1711 * Failed. Cleanup and return.
1712 */
1713 AssertFailed();
1714 RTMemFree(pReNative->pInstrBuf);
1715 RTMemFree(pReNative->paLabels);
1716 RTMemFree(pReNative->paFixups);
1717#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1718 RTMemFree(pReNative->pDbgInfo);
1719#endif
1720 RTMemFree(pReNative);
1721 return NULL;
1722}
1723
1724
1725/**
1726 * Creates a label
1727 *
1728 * If the label does not yet have a defined position,
1729 * call iemNativeLabelDefine() later to set it.
1730 *
1731 * @returns Label ID. Throws VBox status code on failure, so no need to check
1732 * the return value.
1733 * @param pReNative The native recompile state.
1734 * @param enmType The label type.
1735 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1736 * label is not yet defined (default).
1737 * @param uData Data associated with the lable. Only applicable to
1738 * certain type of labels. Default is zero.
1739 */
1740DECL_HIDDEN_THROW(uint32_t)
1741iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1742 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1743{
1744 /*
1745 * Locate existing label definition.
1746 *
1747 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1748 * and uData is zero.
1749 */
1750 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1751 uint32_t const cLabels = pReNative->cLabels;
1752 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1753#ifndef VBOX_STRICT
1754 && offWhere == UINT32_MAX
1755 && uData == 0
1756#endif
1757 )
1758 {
1759 /** @todo Since this is only used for labels with uData = 0, just use a
1760 * lookup array? */
1761 for (uint32_t i = 0; i < cLabels; i++)
1762 if ( paLabels[i].enmType == enmType
1763 && paLabels[i].uData == uData)
1764 {
1765#ifdef VBOX_STRICT
1766 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1767 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1768#endif
1769 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1770 return i;
1771 }
1772 }
1773
1774 /*
1775 * Make sure we've got room for another label.
1776 */
1777 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1778 { /* likely */ }
1779 else
1780 {
1781 uint32_t cNew = pReNative->cLabelsAlloc;
1782 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1783 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1784 cNew *= 2;
1785 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1786 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1787 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1788 pReNative->paLabels = paLabels;
1789 pReNative->cLabelsAlloc = cNew;
1790 }
1791
1792 /*
1793 * Define a new label.
1794 */
1795 paLabels[cLabels].off = offWhere;
1796 paLabels[cLabels].enmType = enmType;
1797 paLabels[cLabels].uData = uData;
1798 pReNative->cLabels = cLabels + 1;
1799
1800 Assert((unsigned)enmType < 64);
1801 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1802
1803 if (offWhere != UINT32_MAX)
1804 {
1805#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1806 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1807 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1808#endif
1809 }
1810 return cLabels;
1811}
1812
1813
1814/**
1815 * Defines the location of an existing label.
1816 *
1817 * @param pReNative The native recompile state.
1818 * @param idxLabel The label to define.
1819 * @param offWhere The position.
1820 */
1821DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1822{
1823 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1824 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1825 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1826 pLabel->off = offWhere;
1827#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1828 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1829 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1830#endif
1831}
1832
1833
1834/**
1835 * Looks up a lable.
1836 *
1837 * @returns Label ID if found, UINT32_MAX if not.
1838 */
1839static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1840 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1841{
1842 Assert((unsigned)enmType < 64);
1843 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1844 {
1845 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1846 uint32_t const cLabels = pReNative->cLabels;
1847 for (uint32_t i = 0; i < cLabels; i++)
1848 if ( paLabels[i].enmType == enmType
1849 && paLabels[i].uData == uData
1850 && ( paLabels[i].off == offWhere
1851 || offWhere == UINT32_MAX
1852 || paLabels[i].off == UINT32_MAX))
1853 return i;
1854 }
1855 return UINT32_MAX;
1856}
1857
1858
1859/**
1860 * Adds a fixup.
1861 *
1862 * @throws VBox status code (int) on failure.
1863 * @param pReNative The native recompile state.
1864 * @param offWhere The instruction offset of the fixup location.
1865 * @param idxLabel The target label ID for the fixup.
1866 * @param enmType The fixup type.
1867 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1868 */
1869DECL_HIDDEN_THROW(void)
1870iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1871 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1872{
1873 Assert(idxLabel <= UINT16_MAX);
1874 Assert((unsigned)enmType <= UINT8_MAX);
1875
1876 /*
1877 * Make sure we've room.
1878 */
1879 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1880 uint32_t const cFixups = pReNative->cFixups;
1881 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1882 { /* likely */ }
1883 else
1884 {
1885 uint32_t cNew = pReNative->cFixupsAlloc;
1886 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1887 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1888 cNew *= 2;
1889 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1890 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1891 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1892 pReNative->paFixups = paFixups;
1893 pReNative->cFixupsAlloc = cNew;
1894 }
1895
1896 /*
1897 * Add the fixup.
1898 */
1899 paFixups[cFixups].off = offWhere;
1900 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1901 paFixups[cFixups].enmType = enmType;
1902 paFixups[cFixups].offAddend = offAddend;
1903 pReNative->cFixups = cFixups + 1;
1904}
1905
1906
1907/**
1908 * Slow code path for iemNativeInstrBufEnsure.
1909 */
1910DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1911{
1912 /* Double the buffer size till we meet the request. */
1913 uint32_t cNew = pReNative->cInstrBufAlloc;
1914 AssertReturn(cNew > 0, NULL);
1915 do
1916 cNew *= 2;
1917 while (cNew < off + cInstrReq);
1918
1919 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1920#ifdef RT_ARCH_ARM64
1921 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1922#else
1923 uint32_t const cbMaxInstrBuf = _2M;
1924#endif
1925 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1926
1927 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1928 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1929
1930 pReNative->cInstrBufAlloc = cNew;
1931 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1932}
1933
1934#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1935
1936/**
1937 * Grows the static debug info array used during recompilation.
1938 *
1939 * @returns Pointer to the new debug info block; throws VBox status code on
1940 * failure, so no need to check the return value.
1941 */
1942DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1943{
1944 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1945 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1946 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1947 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1948 pReNative->pDbgInfo = pDbgInfo;
1949 pReNative->cDbgInfoAlloc = cNew;
1950 return pDbgInfo;
1951}
1952
1953
1954/**
1955 * Adds a new debug info uninitialized entry, returning the pointer to it.
1956 */
1957DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1958{
1959 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1960 { /* likely */ }
1961 else
1962 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1963 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1964}
1965
1966
1967/**
1968 * Debug Info: Adds a native offset record, if necessary.
1969 */
1970static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1971{
1972 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1973
1974 /*
1975 * Search backwards to see if we've got a similar record already.
1976 */
1977 uint32_t idx = pDbgInfo->cEntries;
1978 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1979 while (idx-- > idxStop)
1980 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1981 {
1982 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1983 return;
1984 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1985 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1986 break;
1987 }
1988
1989 /*
1990 * Add it.
1991 */
1992 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1993 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1994 pEntry->NativeOffset.offNative = off;
1995}
1996
1997
1998/**
1999 * Debug Info: Record info about a label.
2000 */
2001static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2002{
2003 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2004 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2005 pEntry->Label.uUnused = 0;
2006 pEntry->Label.enmLabel = (uint8_t)enmType;
2007 pEntry->Label.uData = uData;
2008}
2009
2010
2011/**
2012 * Debug Info: Record info about a threaded call.
2013 */
2014static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2015{
2016 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2017 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2018 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2019 pEntry->ThreadedCall.uUnused = 0;
2020 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2021}
2022
2023
2024/**
2025 * Debug Info: Record info about a new guest instruction.
2026 */
2027static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2028{
2029 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2030 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2031 pEntry->GuestInstruction.uUnused = 0;
2032 pEntry->GuestInstruction.fExec = fExec;
2033}
2034
2035
2036/**
2037 * Debug Info: Record info about guest register shadowing.
2038 */
2039static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2040 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2041{
2042 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2043 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2044 pEntry->GuestRegShadowing.uUnused = 0;
2045 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2046 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2047 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2048}
2049
2050#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2051
2052
2053/*********************************************************************************************************************************
2054* Register Allocator *
2055*********************************************************************************************************************************/
2056
2057/**
2058 * Register parameter indexes (indexed by argument number).
2059 */
2060DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2061{
2062 IEMNATIVE_CALL_ARG0_GREG,
2063 IEMNATIVE_CALL_ARG1_GREG,
2064 IEMNATIVE_CALL_ARG2_GREG,
2065 IEMNATIVE_CALL_ARG3_GREG,
2066#if defined(IEMNATIVE_CALL_ARG4_GREG)
2067 IEMNATIVE_CALL_ARG4_GREG,
2068# if defined(IEMNATIVE_CALL_ARG5_GREG)
2069 IEMNATIVE_CALL_ARG5_GREG,
2070# if defined(IEMNATIVE_CALL_ARG6_GREG)
2071 IEMNATIVE_CALL_ARG6_GREG,
2072# if defined(IEMNATIVE_CALL_ARG7_GREG)
2073 IEMNATIVE_CALL_ARG7_GREG,
2074# endif
2075# endif
2076# endif
2077#endif
2078};
2079
2080/**
2081 * Call register masks indexed by argument count.
2082 */
2083DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2084{
2085 0,
2086 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2087 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2088 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2089 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2090 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2091#if defined(IEMNATIVE_CALL_ARG4_GREG)
2092 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2093 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2094# if defined(IEMNATIVE_CALL_ARG5_GREG)
2095 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2096 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2097# if defined(IEMNATIVE_CALL_ARG6_GREG)
2098 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2099 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2100 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2101# if defined(IEMNATIVE_CALL_ARG7_GREG)
2102 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2103 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2104 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2105# endif
2106# endif
2107# endif
2108#endif
2109};
2110
2111#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2112/**
2113 * BP offset of the stack argument slots.
2114 *
2115 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2116 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2117 */
2118DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2119{
2120 IEMNATIVE_FP_OFF_STACK_ARG0,
2121# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2122 IEMNATIVE_FP_OFF_STACK_ARG1,
2123# endif
2124# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2125 IEMNATIVE_FP_OFF_STACK_ARG2,
2126# endif
2127# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2128 IEMNATIVE_FP_OFF_STACK_ARG3,
2129# endif
2130};
2131AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2132#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2133
2134/**
2135 * Info about shadowed guest register values.
2136 * @see IEMNATIVEGSTREG
2137 */
2138static struct
2139{
2140 /** Offset in VMCPU. */
2141 uint32_t off;
2142 /** The field size. */
2143 uint8_t cb;
2144 /** Name (for logging). */
2145 const char *pszName;
2146} const g_aGstShadowInfo[] =
2147{
2148#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2160 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2161 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2162 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2163 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2164 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2165 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2166 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2167 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2168 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2169 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2170 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2171 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2172 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2173 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2174 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2175 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2176 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2177 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2178 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2179 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2180 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2181 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2182 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2183 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2184 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2185#undef CPUMCTX_OFF_AND_SIZE
2186};
2187AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2188
2189
2190/** Host CPU general purpose register names. */
2191DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2192{
2193#ifdef RT_ARCH_AMD64
2194 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2195#elif RT_ARCH_ARM64
2196 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2197 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2198#else
2199# error "port me"
2200#endif
2201};
2202
2203
2204DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2205 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2206{
2207 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2208
2209 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2210 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2211 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2212 return (uint8_t)idxReg;
2213}
2214
2215
2216/**
2217 * Tries to locate a suitable register in the given register mask.
2218 *
2219 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2220 * failed.
2221 *
2222 * @returns Host register number on success, returns UINT8_MAX on failure.
2223 */
2224static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2225{
2226 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2227 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2228 if (fRegs)
2229 {
2230 /** @todo pick better here: */
2231 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2232
2233 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2234 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2235 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2236 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2237
2238 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2239 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2240 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2241 return idxReg;
2242 }
2243 return UINT8_MAX;
2244}
2245
2246
2247/**
2248 * Locate a register, possibly freeing one up.
2249 *
2250 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2251 * failed.
2252 *
2253 * @returns Host register number on success. Returns UINT8_MAX if no registers
2254 * found, the caller is supposed to deal with this and raise a
2255 * allocation type specific status code (if desired).
2256 *
2257 * @throws VBox status code if we're run into trouble spilling a variable of
2258 * recording debug info. Does NOT throw anything if we're out of
2259 * registers, though.
2260 */
2261static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2262 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2263{
2264 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2265 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2266
2267 /*
2268 * Try a freed register that's shadowing a guest register
2269 */
2270 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2271 if (fRegs)
2272 {
2273 unsigned const idxReg = (fPreferVolatile
2274 ? ASMBitFirstSetU32(fRegs)
2275 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2276 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2277 - 1;
2278
2279 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2280 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2281 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2282 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2283
2284 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2285 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2286 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2287 return idxReg;
2288 }
2289
2290 /*
2291 * Try free up a variable that's in a register.
2292 *
2293 * We do two rounds here, first evacuating variables we don't need to be
2294 * saved on the stack, then in the second round move things to the stack.
2295 */
2296 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2297 {
2298 uint32_t fVars = pReNative->Core.bmVars;
2299 while (fVars)
2300 {
2301 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2302 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2303 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2304 && (RT_BIT_32(idxReg) & fRegMask)
2305 && ( iLoop == 0
2306 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2307 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2308 {
2309 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2310 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2311 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2312 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2313 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2314 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2315
2316 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2317 {
2318 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
2319 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2320 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeVarCalcBpDisp(pReNative, idxVar), idxReg);
2321 }
2322
2323 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2324 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2325 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2326 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2327 return idxReg;
2328 }
2329 fVars &= ~RT_BIT_32(idxVar);
2330 }
2331 }
2332
2333 return UINT8_MAX;
2334}
2335
2336
2337/**
2338 * Moves a variable to a different register or spills it onto the stack.
2339 *
2340 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2341 * kinds can easily be recreated if needed later.
2342 *
2343 * @returns The new code buffer position, UINT32_MAX on failure.
2344 * @param pReNative The native recompile state.
2345 * @param off The current code buffer position.
2346 * @param idxVar The variable index.
2347 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2348 * call-volatile registers.
2349 */
2350static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2351 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2352{
2353 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2354 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2355
2356 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2357 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2358 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2359 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2360 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2361 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2362 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2363 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2364 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2365
2366
2367 /** @todo Add statistics on this.*/
2368 /** @todo Implement basic variable liveness analysis (python) so variables
2369 * can be freed immediately once no longer used. This has the potential to
2370 * be trashing registers and stack for dead variables. */
2371
2372 /*
2373 * First try move it to a different register, as that's cheaper.
2374 */
2375 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2376 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2377 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2378 if (fRegs)
2379 {
2380 /* Avoid using shadow registers, if possible. */
2381 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2382 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2383 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2384 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2385
2386 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2387 Log12(("iemNativeRegMoveOrSpillStackVar: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
2388 idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2389 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2390 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2391 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2392 if (fGstRegShadows)
2393 {
2394 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2395 while (fGstRegShadows)
2396 {
2397 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2398 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2399
2400 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2401 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2402 }
2403 }
2404
2405 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2406 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2407 }
2408 /*
2409 * Otherwise we must spill the register onto the stack.
2410 */
2411 else
2412 {
2413 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
2414 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
2415 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
2416 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2417 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
2418
2419 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2420 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2421 }
2422
2423 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2424 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2425 return off;
2426}
2427
2428
2429/**
2430 * Allocates a temporary host general purpose register.
2431 *
2432 * This may emit code to save register content onto the stack in order to free
2433 * up a register.
2434 *
2435 * @returns The host register number; throws VBox status code on failure,
2436 * so no need to check the return value.
2437 * @param pReNative The native recompile state.
2438 * @param poff Pointer to the variable with the code buffer position.
2439 * This will be update if we need to move a variable from
2440 * register to stack in order to satisfy the request.
2441 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2442 * registers (@c true, default) or the other way around
2443 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2444 */
2445DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2446{
2447 /*
2448 * Try find a completely unused register, preferably a call-volatile one.
2449 */
2450 uint8_t idxReg;
2451 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2452 & ~pReNative->Core.bmHstRegsWithGstShadow
2453 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2454 if (fRegs)
2455 {
2456 if (fPreferVolatile)
2457 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2458 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2459 else
2460 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2461 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2462 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2463 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2464 }
2465 else
2466 {
2467 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2468 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2469 }
2470 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2471}
2472
2473
2474/**
2475 * Allocates a temporary register for loading an immediate value into.
2476 *
2477 * This will emit code to load the immediate, unless there happens to be an
2478 * unused register with the value already loaded.
2479 *
2480 * The caller will not modify the returned register, it must be considered
2481 * read-only. Free using iemNativeRegFreeTmpImm.
2482 *
2483 * @returns The host register number; throws VBox status code on failure, so no
2484 * need to check the return value.
2485 * @param pReNative The native recompile state.
2486 * @param poff Pointer to the variable with the code buffer position.
2487 * @param uImm The immediate value that the register must hold upon
2488 * return.
2489 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2490 * registers (@c true, default) or the other way around
2491 * (@c false).
2492 *
2493 * @note Reusing immediate values has not been implemented yet.
2494 */
2495DECL_HIDDEN_THROW(uint8_t)
2496iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2497{
2498 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2499 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2500 return idxReg;
2501}
2502
2503
2504/**
2505 * Marks host register @a idxHstReg as containing a shadow copy of guest
2506 * register @a enmGstReg.
2507 *
2508 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2509 * host register before calling.
2510 */
2511DECL_FORCE_INLINE(void)
2512iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2513{
2514 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2515 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2516
2517 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2518 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2519 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2520 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2521#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2522 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2523 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2524#else
2525 RT_NOREF(off);
2526#endif
2527}
2528
2529
2530/**
2531 * Clear any guest register shadow claims from @a idxHstReg.
2532 *
2533 * The register does not need to be shadowing any guest registers.
2534 */
2535DECL_FORCE_INLINE(void)
2536iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2537{
2538 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2539 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2540 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2541 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2542 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2543
2544#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2545 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2546 if (fGstRegs)
2547 {
2548 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
2549 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2550 while (fGstRegs)
2551 {
2552 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2553 fGstRegs &= ~RT_BIT_64(iGstReg);
2554 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2555 }
2556 }
2557#else
2558 RT_NOREF(off);
2559#endif
2560
2561 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2562 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2563 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2564}
2565
2566
2567/**
2568 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
2569 * and global overview flags.
2570 */
2571DECL_FORCE_INLINE(void)
2572iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2573{
2574 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2575 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2576 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2577 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2578 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
2579 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2580 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2581
2582#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2583 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2584 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
2585#else
2586 RT_NOREF(off);
2587#endif
2588
2589 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2590 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
2591 if (!fGstRegShadowsNew)
2592 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2593 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
2594}
2595
2596
2597/**
2598 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2599 * to @a idxRegTo.
2600 */
2601DECL_FORCE_INLINE(void)
2602iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2603 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2604{
2605 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2606 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2607 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
2608 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2609 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
2610 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
2611 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2612 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2613
2614 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2615 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
2616 if (!fGstRegShadowsFrom)
2617 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
2618 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
2619 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
2620 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2621#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2622 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2623 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2624#else
2625 RT_NOREF(off);
2626#endif
2627}
2628
2629
2630/**
2631 * Allocates a temporary host general purpose register for keeping a guest
2632 * register value.
2633 *
2634 * Since we may already have a register holding the guest register value,
2635 * code will be emitted to do the loading if that's not the case. Code may also
2636 * be emitted if we have to free up a register to satify the request.
2637 *
2638 * @returns The host register number; throws VBox status code on failure, so no
2639 * need to check the return value.
2640 * @param pReNative The native recompile state.
2641 * @param poff Pointer to the variable with the code buffer
2642 * position. This will be update if we need to move a
2643 * variable from register to stack in order to satisfy
2644 * the request.
2645 * @param enmGstReg The guest register that will is to be updated.
2646 * @param enmIntendedUse How the caller will be using the host register.
2647 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2648 */
2649DECL_HIDDEN_THROW(uint8_t)
2650iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2651 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2652{
2653 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2654#ifdef LOG_ENABLED
2655 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
2656#endif
2657
2658 /*
2659 * First check if the guest register value is already in a host register.
2660 */
2661 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2662 {
2663 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2664 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2665 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2666 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2667
2668 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2669 {
2670 /*
2671 * If the register will trash the guest shadow copy, try find a
2672 * completely unused register we can use instead. If that fails,
2673 * we need to disassociate the host reg from the guest reg.
2674 */
2675 /** @todo would be nice to know if preserving the register is in any way helpful. */
2676 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2677 && ( ~pReNative->Core.bmHstRegs
2678 & ~pReNative->Core.bmHstRegsWithGstShadow
2679 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2680 {
2681 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2682
2683 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2684
2685 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2686 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2687 g_apszIemNativeHstRegNames[idxRegNew]));
2688 idxReg = idxRegNew;
2689 }
2690 else
2691 {
2692 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2693 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2694 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2695 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2696 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2697 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2698 else
2699 {
2700 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2701 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2702 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2703 }
2704 }
2705 }
2706 else
2707 {
2708 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2709 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
2710 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
2711 idxReg, s_pszIntendedUse[enmIntendedUse]));
2712
2713 /*
2714 * Allocate a new register, copy the value and, if updating, the
2715 * guest shadow copy assignment to the new register.
2716 */
2717 /** @todo share register for readonly access. */
2718 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2719
2720 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2721 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2722
2723 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2724 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2725 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2726 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2727 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2728 else
2729 {
2730 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2731 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
2732 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2733 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2734 }
2735 idxReg = idxRegNew;
2736 }
2737
2738#ifdef VBOX_STRICT
2739 /* Strict builds: Check that the value is correct. */
2740 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2741#endif
2742
2743 return idxReg;
2744 }
2745
2746 /*
2747 * Allocate a new register, load it with the guest value and designate it as a copy of the
2748 */
2749 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2750
2751 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2752 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2753
2754 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2755 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2756 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2757 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2758
2759 return idxRegNew;
2760}
2761
2762
2763/**
2764 * Allocates a temporary host general purpose register that already holds the
2765 * given guest register value.
2766 *
2767 * The use case for this function is places where the shadowing state cannot be
2768 * modified due to branching and such. This will fail if the we don't have a
2769 * current shadow copy handy or if it's incompatible. The only code that will
2770 * be emitted here is value checking code in strict builds.
2771 *
2772 * The intended use can only be readonly!
2773 *
2774 * @returns The host register number, UINT8_MAX if not present.
2775 * @param pReNative The native recompile state.
2776 * @param poff Pointer to the instruction buffer offset.
2777 * Will be updated in strict builds if a register is
2778 * found.
2779 * @param enmGstReg The guest register that will is to be updated.
2780 * @note In strict builds, this may throw instruction buffer growth failures.
2781 * Non-strict builds will not throw anything.
2782 * @sa iemNativeRegAllocTmpForGuestReg
2783 */
2784DECL_HIDDEN_THROW(uint8_t)
2785iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2786{
2787 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2788
2789 /*
2790 * First check if the guest register value is already in a host register.
2791 */
2792 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2793 {
2794 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2795 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2796 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2797 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2798
2799 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2800 {
2801 /*
2802 * We only do readonly use here, so easy compared to the other
2803 * variant of this code.
2804 */
2805 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2806 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2807 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2808 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2809 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2810
2811#ifdef VBOX_STRICT
2812 /* Strict builds: Check that the value is correct. */
2813 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2814#else
2815 RT_NOREF(poff);
2816#endif
2817 return idxReg;
2818 }
2819 }
2820
2821 return UINT8_MAX;
2822}
2823
2824
2825DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2826
2827
2828/**
2829 * Allocates argument registers for a function call.
2830 *
2831 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2832 * need to check the return value.
2833 * @param pReNative The native recompile state.
2834 * @param off The current code buffer offset.
2835 * @param cArgs The number of arguments the function call takes.
2836 */
2837DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2838{
2839 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2840 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2841 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2842 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2843
2844 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2845 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2846 else if (cArgs == 0)
2847 return true;
2848
2849 /*
2850 * Do we get luck and all register are free and not shadowing anything?
2851 */
2852 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2853 for (uint32_t i = 0; i < cArgs; i++)
2854 {
2855 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2856 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2857 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2858 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2859 }
2860 /*
2861 * Okay, not lucky so we have to free up the registers.
2862 */
2863 else
2864 for (uint32_t i = 0; i < cArgs; i++)
2865 {
2866 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2867 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2868 {
2869 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2870 {
2871 case kIemNativeWhat_Var:
2872 {
2873 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2874 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2875 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2876 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2877 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2878
2879 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2880 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2881 else
2882 {
2883 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2884 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2885 }
2886 break;
2887 }
2888
2889 case kIemNativeWhat_Tmp:
2890 case kIemNativeWhat_Arg:
2891 case kIemNativeWhat_rc:
2892 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2893 default:
2894 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2895 }
2896
2897 }
2898 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2899 {
2900 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2901 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2902 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2903 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2904 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2905 }
2906 else
2907 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2908 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2909 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2910 }
2911 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2912 return true;
2913}
2914
2915
2916DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2917
2918
2919#if 0
2920/**
2921 * Frees a register assignment of any type.
2922 *
2923 * @param pReNative The native recompile state.
2924 * @param idxHstReg The register to free.
2925 *
2926 * @note Does not update variables.
2927 */
2928DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2929{
2930 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2931 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2932 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2933 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2934 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2935 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2936 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2937 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2938 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2939 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2940 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2941 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2942 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2943 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2944
2945 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2946 /* no flushing, right:
2947 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2948 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2949 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2950 */
2951}
2952#endif
2953
2954
2955/**
2956 * Frees a temporary register.
2957 *
2958 * Any shadow copies of guest registers assigned to the host register will not
2959 * be flushed by this operation.
2960 */
2961DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2962{
2963 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2964 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2965 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2966 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2967 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2968}
2969
2970
2971/**
2972 * Frees a temporary immediate register.
2973 *
2974 * It is assumed that the call has not modified the register, so it still hold
2975 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2976 */
2977DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2978{
2979 iemNativeRegFreeTmp(pReNative, idxHstReg);
2980}
2981
2982
2983/**
2984 * Called right before emitting a call instruction to move anything important
2985 * out of call-volatile registers, free and flush the call-volatile registers,
2986 * optionally freeing argument variables.
2987 *
2988 * @returns New code buffer offset, UINT32_MAX on failure.
2989 * @param pReNative The native recompile state.
2990 * @param off The code buffer offset.
2991 * @param cArgs The number of arguments the function call takes.
2992 * It is presumed that the host register part of these have
2993 * been allocated as such already and won't need moving,
2994 * just freeing.
2995 */
2996DECL_HIDDEN_THROW(uint32_t)
2997iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2998{
2999 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
3000
3001 /*
3002 * Move anything important out of volatile registers.
3003 */
3004 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3005 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3006 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
3007#ifdef IEMNATIVE_REG_FIXED_TMP0
3008 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3009#endif
3010 & ~g_afIemNativeCallRegs[cArgs];
3011
3012 fRegsToMove &= pReNative->Core.bmHstRegs;
3013 if (!fRegsToMove)
3014 { /* likely */ }
3015 else
3016 {
3017 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
3018 while (fRegsToMove != 0)
3019 {
3020 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
3021 fRegsToMove &= ~RT_BIT_32(idxReg);
3022
3023 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3024 {
3025 case kIemNativeWhat_Var:
3026 {
3027 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3028 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3029 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3030 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3031 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
3032 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
3033 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3034 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3035 else
3036 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3037 continue;
3038 }
3039
3040 case kIemNativeWhat_Arg:
3041 AssertMsgFailed(("What?!?: %u\n", idxReg));
3042 continue;
3043
3044 case kIemNativeWhat_rc:
3045 case kIemNativeWhat_Tmp:
3046 AssertMsgFailed(("Missing free: %u\n", idxReg));
3047 continue;
3048
3049 case kIemNativeWhat_FixedTmp:
3050 case kIemNativeWhat_pVCpuFixed:
3051 case kIemNativeWhat_pCtxFixed:
3052 case kIemNativeWhat_FixedReserved:
3053 case kIemNativeWhat_Invalid:
3054 case kIemNativeWhat_End:
3055 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3056 }
3057 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3058 }
3059 }
3060
3061 /*
3062 * Do the actual freeing.
3063 */
3064 if (pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n", pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3066 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3067
3068 /* If there are guest register shadows in any call-volatile register, we
3069 have to clear the corrsponding guest register masks for each register. */
3070 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3071 if (fHstRegsWithGstShadow)
3072 {
3073 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3074 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
3075 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3076 do
3077 {
3078 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3079 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3080
3081 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
3082 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3083 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3084 } while (fHstRegsWithGstShadow != 0);
3085 }
3086
3087 return off;
3088}
3089
3090
3091/**
3092 * Flushes a set of guest register shadow copies.
3093 *
3094 * This is usually done after calling a threaded function or a C-implementation
3095 * of an instruction.
3096 *
3097 * @param pReNative The native recompile state.
3098 * @param fGstRegs Set of guest registers to flush.
3099 */
3100DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3101{
3102 /*
3103 * Reduce the mask by what's currently shadowed
3104 */
3105 fGstRegs &= pReNative->Core.bmGstRegShadows;
3106 if (fGstRegs)
3107 {
3108 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n",
3109 fGstRegs, pReNative->Core.bmGstRegShadows, pReNative->Core.bmGstRegShadows & ~fGstRegs));
3110 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3111 if (pReNative->Core.bmGstRegShadows)
3112 {
3113 /*
3114 * Partial.
3115 */
3116 do
3117 {
3118 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3119 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3120 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3121 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3122 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3123
3124 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3125 fGstRegs &= ~fInThisHstReg;
3126 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3127 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3128 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3129 } while (fGstRegs != 0);
3130 }
3131 else
3132 {
3133 /*
3134 * Clear all.
3135 */
3136 do
3137 {
3138 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3139 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3140 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3141 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3142 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3143
3144 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3145 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3146 } while (fGstRegs != 0);
3147 pReNative->Core.bmHstRegsWithGstShadow = 0;
3148 }
3149 }
3150}
3151
3152
3153/**
3154 * Flushes any delayed guest register writes.
3155 *
3156 * This must be called prior to calling CImpl functions and any helpers that use
3157 * the guest state (like raising exceptions) and such.
3158 *
3159 * This optimization has not yet been implemented. The first target would be
3160 * RIP updates, since these are the most common ones.
3161 */
3162DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3163{
3164 RT_NOREF(pReNative, off);
3165 return off;
3166}
3167
3168
3169#ifdef VBOX_STRICT
3170/**
3171 * Does internal register allocator sanity checks.
3172 */
3173static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
3174{
3175 /*
3176 * Iterate host registers building a guest shadowing set.
3177 */
3178 uint64_t bmGstRegShadows = 0;
3179 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
3180 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
3181 while (bmHstRegsWithGstShadow)
3182 {
3183 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
3184 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3185 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3186
3187 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3188 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
3189 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
3190 bmGstRegShadows |= fThisGstRegShadows;
3191 while (fThisGstRegShadows)
3192 {
3193 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
3194 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
3195 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
3196 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
3197 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
3198 }
3199 }
3200 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
3201 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
3202 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
3203
3204 /*
3205 * Now the other way around, checking the guest to host index array.
3206 */
3207 bmHstRegsWithGstShadow = 0;
3208 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
3209 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3210 while (bmGstRegShadows)
3211 {
3212 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
3213 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3214 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
3215
3216 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3217 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
3218 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
3219 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
3220 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3221 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3222 }
3223 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
3224 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
3225 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
3226}
3227#endif
3228
3229
3230/*********************************************************************************************************************************
3231* Code Emitters (larger snippets) *
3232*********************************************************************************************************************************/
3233
3234/**
3235 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3236 * extending to 64-bit width.
3237 *
3238 * @returns New code buffer offset on success, UINT32_MAX on failure.
3239 * @param pReNative .
3240 * @param off The current code buffer position.
3241 * @param idxHstReg The host register to load the guest register value into.
3242 * @param enmGstReg The guest register to load.
3243 *
3244 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3245 * that is something the caller needs to do if applicable.
3246 */
3247DECL_HIDDEN_THROW(uint32_t)
3248iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3249{
3250 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3251 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3252
3253 switch (g_aGstShadowInfo[enmGstReg].cb)
3254 {
3255 case sizeof(uint64_t):
3256 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3257 case sizeof(uint32_t):
3258 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3259 case sizeof(uint16_t):
3260 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3261#if 0 /* not present in the table. */
3262 case sizeof(uint8_t):
3263 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3264#endif
3265 default:
3266 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3267 }
3268}
3269
3270
3271#ifdef VBOX_STRICT
3272/**
3273 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
3274 *
3275 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3276 * Trashes EFLAGS on AMD64.
3277 */
3278static uint32_t
3279iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
3280{
3281# ifdef RT_ARCH_AMD64
3282 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3283
3284 /* rol reg64, 32 */
3285 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3286 pbCodeBuf[off++] = 0xc1;
3287 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3288 pbCodeBuf[off++] = 32;
3289
3290 /* test reg32, ffffffffh */
3291 if (idxReg >= 8)
3292 pbCodeBuf[off++] = X86_OP_REX_B;
3293 pbCodeBuf[off++] = 0xf7;
3294 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3295 pbCodeBuf[off++] = 0xff;
3296 pbCodeBuf[off++] = 0xff;
3297 pbCodeBuf[off++] = 0xff;
3298 pbCodeBuf[off++] = 0xff;
3299
3300 /* je/jz +1 */
3301 pbCodeBuf[off++] = 0x74;
3302 pbCodeBuf[off++] = 0x01;
3303
3304 /* int3 */
3305 pbCodeBuf[off++] = 0xcc;
3306
3307 /* rol reg64, 32 */
3308 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3309 pbCodeBuf[off++] = 0xc1;
3310 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3311 pbCodeBuf[off++] = 32;
3312
3313# elif defined(RT_ARCH_ARM64)
3314 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3315 /* lsr tmp0, reg64, #32 */
3316 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
3317 /* cbz tmp0, +1 */
3318 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3319 /* brk #0x1100 */
3320 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
3321
3322# else
3323# error "Port me!"
3324# endif
3325 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3326 return off;
3327}
3328#endif /* VBOX_STRICT */
3329
3330
3331#ifdef VBOX_STRICT
3332/**
3333 * Emitting code that checks that the content of register @a idxReg is the same
3334 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3335 * instruction if that's not the case.
3336 *
3337 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3338 * Trashes EFLAGS on AMD64.
3339 */
3340static uint32_t
3341iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3342{
3343# ifdef RT_ARCH_AMD64
3344 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3345
3346 /* cmp reg, [mem] */
3347 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3348 {
3349 if (idxReg >= 8)
3350 pbCodeBuf[off++] = X86_OP_REX_R;
3351 pbCodeBuf[off++] = 0x38;
3352 }
3353 else
3354 {
3355 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3356 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3357 else
3358 {
3359 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3360 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3361 else
3362 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3363 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3364 if (idxReg >= 8)
3365 pbCodeBuf[off++] = X86_OP_REX_R;
3366 }
3367 pbCodeBuf[off++] = 0x39;
3368 }
3369 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3370
3371 /* je/jz +1 */
3372 pbCodeBuf[off++] = 0x74;
3373 pbCodeBuf[off++] = 0x01;
3374
3375 /* int3 */
3376 pbCodeBuf[off++] = 0xcc;
3377
3378 /* For values smaller than the register size, we must check that the rest
3379 of the register is all zeros. */
3380 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3381 {
3382 /* test reg64, imm32 */
3383 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3384 pbCodeBuf[off++] = 0xf7;
3385 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3386 pbCodeBuf[off++] = 0;
3387 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3388 pbCodeBuf[off++] = 0xff;
3389 pbCodeBuf[off++] = 0xff;
3390
3391 /* je/jz +1 */
3392 pbCodeBuf[off++] = 0x74;
3393 pbCodeBuf[off++] = 0x01;
3394
3395 /* int3 */
3396 pbCodeBuf[off++] = 0xcc;
3397 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3398 }
3399 else
3400 {
3401 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3402 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3403 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
3404 }
3405
3406# elif defined(RT_ARCH_ARM64)
3407 /* mov TMP0, [gstreg] */
3408 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3409
3410 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3411 /* sub tmp0, tmp0, idxReg */
3412 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3413 /* cbz tmp0, +1 */
3414 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3415 /* brk #0x1000+enmGstReg */
3416 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3418
3419# else
3420# error "Port me!"
3421# endif
3422 return off;
3423}
3424#endif /* VBOX_STRICT */
3425
3426
3427/**
3428 * Emits a code for checking the return code of a call and rcPassUp, returning
3429 * from the code if either are non-zero.
3430 */
3431DECL_HIDDEN_THROW(uint32_t)
3432iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3433{
3434#ifdef RT_ARCH_AMD64
3435 /*
3436 * AMD64: eax = call status code.
3437 */
3438
3439 /* edx = rcPassUp */
3440 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3441# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3442 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3443# endif
3444
3445 /* edx = eax | rcPassUp */
3446 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3447 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3448 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3449 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3450
3451 /* Jump to non-zero status return path. */
3452 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3453
3454 /* done. */
3455
3456#elif RT_ARCH_ARM64
3457 /*
3458 * ARM64: w0 = call status code.
3459 */
3460 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3461 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3462
3463 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3464
3465 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3466
3467 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3468 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3469 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3470
3471#else
3472# error "port me"
3473#endif
3474 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3475 return off;
3476}
3477
3478
3479/**
3480 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3481 * raising a \#GP(0) if it isn't.
3482 *
3483 * @returns New code buffer offset, UINT32_MAX on failure.
3484 * @param pReNative The native recompile state.
3485 * @param off The code buffer offset.
3486 * @param idxAddrReg The host register with the address to check.
3487 * @param idxInstr The current instruction.
3488 */
3489DECL_HIDDEN_THROW(uint32_t)
3490iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3491{
3492 RT_NOREF(idxInstr);
3493
3494 /*
3495 * Make sure we don't have any outstanding guest register writes as we may
3496 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3497 */
3498 off = iemNativeRegFlushPendingWrites(pReNative, off);
3499
3500#ifdef RT_ARCH_AMD64
3501 /*
3502 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3503 * return raisexcpt();
3504 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3505 */
3506 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3507
3508 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3509 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3510 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3511 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3512
3513# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3514 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3515# else
3516 uint32_t const offFixup = off;
3517 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3518 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3519 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3520 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3521# endif
3522
3523 iemNativeRegFreeTmp(pReNative, iTmpReg);
3524
3525#elif defined(RT_ARCH_ARM64)
3526 /*
3527 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3528 * return raisexcpt();
3529 * ----
3530 * mov x1, 0x800000000000
3531 * add x1, x0, x1
3532 * cmp xzr, x1, lsr 48
3533 * and either:
3534 * b.ne .Lraisexcpt
3535 * or:
3536 * b.eq .Lnoexcept
3537 * movz x1, #instruction-number
3538 * b .Lraisexcpt
3539 * .Lnoexcept:
3540 */
3541 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3542
3543 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3544 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3545 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3546
3547# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3548 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3549# else
3550 uint32_t const offFixup = off;
3551 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3552 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3553 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3554 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3555# endif
3556
3557 iemNativeRegFreeTmp(pReNative, iTmpReg);
3558
3559#else
3560# error "Port me"
3561#endif
3562 return off;
3563}
3564
3565
3566/**
3567 * Emits code to check if the content of @a idxAddrReg is within the limit of
3568 * idxSegReg, raising a \#GP(0) if it isn't.
3569 *
3570 * @returns New code buffer offset; throws VBox status code on error.
3571 * @param pReNative The native recompile state.
3572 * @param off The code buffer offset.
3573 * @param idxAddrReg The host register (32-bit) with the address to
3574 * check.
3575 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3576 * against.
3577 * @param idxInstr The current instruction.
3578 */
3579DECL_HIDDEN_THROW(uint32_t)
3580iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3581 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3582{
3583 /*
3584 * Make sure we don't have any outstanding guest register writes as we may
3585 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3586 */
3587 off = iemNativeRegFlushPendingWrites(pReNative, off);
3588
3589 /** @todo implement expand down/whatnot checking */
3590 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3591
3592 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3593 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3594 kIemNativeGstRegUse_ForUpdate);
3595
3596 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3597
3598#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3599 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3600 RT_NOREF(idxInstr);
3601#else
3602 uint32_t const offFixup = off;
3603 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3604 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3605 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3606 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3607#endif
3608
3609 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3610 return off;
3611}
3612
3613
3614/**
3615 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3616 *
3617 * @returns The flush mask.
3618 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3619 * @param fGstShwFlush The starting flush mask.
3620 */
3621DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3622{
3623 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3624 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3625 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3626 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3627 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3628 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3629 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3630 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3631 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3632 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3633 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3634 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3635 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3636 return fGstShwFlush;
3637}
3638
3639
3640/**
3641 * Emits a call to a CImpl function or something similar.
3642 */
3643static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3644 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3645 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3646{
3647 /*
3648 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
3649 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
3650 */
3651 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
3652 fGstShwFlush
3653 | RT_BIT_64(kIemNativeGstReg_Pc)
3654 | RT_BIT_64(kIemNativeGstReg_EFlags));
3655 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3656
3657 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3658
3659 /*
3660 * Load the parameters.
3661 */
3662#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3663 /* Special code the hidden VBOXSTRICTRC pointer. */
3664 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3665 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3666 if (cAddParams > 0)
3667 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3668 if (cAddParams > 1)
3669 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3670 if (cAddParams > 2)
3671 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3672 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3673
3674#else
3675 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3676 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3677 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3678 if (cAddParams > 0)
3679 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3680 if (cAddParams > 1)
3681 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3682 if (cAddParams > 2)
3683# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3684 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3685# else
3686 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3687# endif
3688#endif
3689
3690 /*
3691 * Make the call.
3692 */
3693 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3694
3695#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3696 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3697#endif
3698
3699 /*
3700 * Check the status code.
3701 */
3702 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3703}
3704
3705
3706/**
3707 * Emits a call to a threaded worker function.
3708 */
3709static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3710{
3711 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3712 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3713 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3714
3715#ifdef RT_ARCH_AMD64
3716 /* Load the parameters and emit the call. */
3717# ifdef RT_OS_WINDOWS
3718# ifndef VBOXSTRICTRC_STRICT_ENABLED
3719 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3720 if (cParams > 0)
3721 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3722 if (cParams > 1)
3723 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3724 if (cParams > 2)
3725 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3726# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3727 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3728 if (cParams > 0)
3729 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3730 if (cParams > 1)
3731 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3732 if (cParams > 2)
3733 {
3734 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3735 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3736 }
3737 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3738# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3739# else
3740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3741 if (cParams > 0)
3742 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3743 if (cParams > 1)
3744 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3745 if (cParams > 2)
3746 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3747# endif
3748
3749 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3750
3751# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3752 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3753# endif
3754
3755#elif RT_ARCH_ARM64
3756 /*
3757 * ARM64:
3758 */
3759 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3760 if (cParams > 0)
3761 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3762 if (cParams > 1)
3763 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3764 if (cParams > 2)
3765 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3766
3767 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3768
3769#else
3770# error "port me"
3771#endif
3772
3773 /*
3774 * Check the status code.
3775 */
3776 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3777
3778 return off;
3779}
3780
3781
3782/**
3783 * Emits the code at the RaiseGP0 label.
3784 */
3785static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3786{
3787 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3788 if (idxLabel != UINT32_MAX)
3789 {
3790 iemNativeLabelDefine(pReNative, idxLabel, off);
3791
3792 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3793 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3794#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3795 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3796#endif
3797 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3798
3799 /* jump back to the return sequence. */
3800 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3801 }
3802 return off;
3803}
3804
3805
3806/**
3807 * Emits the code at the ReturnWithFlags label (returns
3808 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3809 */
3810static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3811{
3812 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3813 if (idxLabel != UINT32_MAX)
3814 {
3815 iemNativeLabelDefine(pReNative, idxLabel, off);
3816
3817 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3818
3819 /* jump back to the return sequence. */
3820 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3821 }
3822 return off;
3823}
3824
3825
3826/**
3827 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3828 */
3829static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3830{
3831 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3832 if (idxLabel != UINT32_MAX)
3833 {
3834 iemNativeLabelDefine(pReNative, idxLabel, off);
3835
3836 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3837
3838 /* jump back to the return sequence. */
3839 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3840 }
3841 return off;
3842}
3843
3844
3845/**
3846 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3847 */
3848static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3849{
3850 /*
3851 * Generate the rc + rcPassUp fiddling code if needed.
3852 */
3853 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3854 if (idxLabel != UINT32_MAX)
3855 {
3856 iemNativeLabelDefine(pReNative, idxLabel, off);
3857
3858 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3859#ifdef RT_ARCH_AMD64
3860# ifdef RT_OS_WINDOWS
3861# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3862 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3863# endif
3864 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3865 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3866# else
3867 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3868 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3869# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3870 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3871# endif
3872# endif
3873# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3874 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3875# endif
3876
3877#else
3878 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3879 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3880 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3881#endif
3882
3883 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3884 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3885 }
3886 return off;
3887}
3888
3889
3890/**
3891 * Emits a standard epilog.
3892 */
3893static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3894{
3895 *pidxReturnLabel = UINT32_MAX;
3896
3897 /*
3898 * Successful return, so clear the return register (eax, w0).
3899 */
3900 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3901
3902 /*
3903 * Define label for common return point.
3904 */
3905 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3906 *pidxReturnLabel = idxReturn;
3907
3908 /*
3909 * Restore registers and return.
3910 */
3911#ifdef RT_ARCH_AMD64
3912 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3913
3914 /* Reposition esp at the r15 restore point. */
3915 pbCodeBuf[off++] = X86_OP_REX_W;
3916 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3917 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3918 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3919
3920 /* Pop non-volatile registers and return */
3921 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3922 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3923 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3924 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3925 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3926 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3927 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3928 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3929# ifdef RT_OS_WINDOWS
3930 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3931 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3932# endif
3933 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3934 pbCodeBuf[off++] = 0xc9; /* leave */
3935 pbCodeBuf[off++] = 0xc3; /* ret */
3936 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3937
3938#elif RT_ARCH_ARM64
3939 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3940
3941 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3942 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3943 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3944 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3945 IEMNATIVE_FRAME_VAR_SIZE / 8);
3946 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3947 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3948 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3949 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3950 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3951 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3952 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3953 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3954 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3955 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3956 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3957 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3958
3959 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3960 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3961 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3962 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3963
3964 /* retab / ret */
3965# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3966 if (1)
3967 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3968 else
3969# endif
3970 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3971
3972#else
3973# error "port me"
3974#endif
3975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3976
3977 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3978}
3979
3980
3981/**
3982 * Emits a standard prolog.
3983 */
3984static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3985{
3986#ifdef RT_ARCH_AMD64
3987 /*
3988 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3989 * reserving 64 bytes for stack variables plus 4 non-register argument
3990 * slots. Fixed register assignment: xBX = pReNative;
3991 *
3992 * Since we always do the same register spilling, we can use the same
3993 * unwind description for all the code.
3994 */
3995 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3996 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3997 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3998 pbCodeBuf[off++] = 0x8b;
3999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
4000 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
4001 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
4002# ifdef RT_OS_WINDOWS
4003 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
4004 pbCodeBuf[off++] = 0x8b;
4005 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
4006 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
4007 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
4008# else
4009 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
4010 pbCodeBuf[off++] = 0x8b;
4011 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
4012# endif
4013 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
4014 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
4015 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
4016 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
4017 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
4018 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
4019 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
4020 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
4021
4022 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
4023 X86_GREG_xSP,
4024 IEMNATIVE_FRAME_ALIGN_SIZE
4025 + IEMNATIVE_FRAME_VAR_SIZE
4026 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
4027 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
4028 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
4029 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
4030 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
4031
4032#elif RT_ARCH_ARM64
4033 /*
4034 * We set up a stack frame exactly like on x86, only we have to push the
4035 * return address our selves here. We save all non-volatile registers.
4036 */
4037 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4038
4039# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
4040 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
4041 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
4042 * in any way conditional, so just emitting this instructions now and hoping for the best... */
4043 /* pacibsp */
4044 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
4045# endif
4046
4047 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
4048 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
4049 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4050 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4051 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
4052 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
4053 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4054 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4055 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4056 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4057 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4058 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4059 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4060 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4061 /* Save the BP and LR (ret address) registers at the top of the frame. */
4062 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4063 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4064 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4065 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
4066 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
4067 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
4068
4069 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
4070 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
4071
4072 /* mov r28, r0 */
4073 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
4074 /* mov r27, r1 */
4075 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
4076
4077#else
4078# error "port me"
4079#endif
4080 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4081 return off;
4082}
4083
4084
4085
4086
4087/*********************************************************************************************************************************
4088* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
4089*********************************************************************************************************************************/
4090
4091#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
4092 { \
4093 Assert(pReNative->Core.bmVars == 0); \
4094 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
4095 Assert(pReNative->Core.bmStack == 0); \
4096 pReNative->fMc = (a_fMcFlags); \
4097 pReNative->fCImpl = (a_fCImplFlags); \
4098 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
4099
4100/** We have to get to the end in recompilation mode, as otherwise we won't
4101 * generate code for all the IEM_MC_IF_XXX branches. */
4102#define IEM_MC_END() \
4103 iemNativeVarFreeAll(pReNative); \
4104 } return off
4105
4106
4107
4108/*********************************************************************************************************************************
4109* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
4110*********************************************************************************************************************************/
4111
4112#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
4113 pReNative->fMc = 0; \
4114 pReNative->fCImpl = (a_fFlags); \
4115 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
4116
4117
4118#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4119 pReNative->fMc = 0; \
4120 pReNative->fCImpl = (a_fFlags); \
4121 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
4122
4123DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4124 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4125 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
4126{
4127 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
4128}
4129
4130
4131#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4132 pReNative->fMc = 0; \
4133 pReNative->fCImpl = (a_fFlags); \
4134 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4135 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
4136
4137DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4138 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4139 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
4140{
4141 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
4142}
4143
4144
4145#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4146 pReNative->fMc = 0; \
4147 pReNative->fCImpl = (a_fFlags); \
4148 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4149 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
4150
4151DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4152 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4153 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
4154 uint64_t uArg2)
4155{
4156 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
4157}
4158
4159
4160
4161/*********************************************************************************************************************************
4162* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
4163*********************************************************************************************************************************/
4164
4165/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4166 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
4167DECL_INLINE_THROW(uint32_t)
4168iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4169{
4170 /*
4171 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4172 * return with special status code and make the execution loop deal with
4173 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4174 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4175 * could continue w/o interruption, it probably will drop into the
4176 * debugger, so not worth the effort of trying to services it here and we
4177 * just lump it in with the handling of the others.
4178 *
4179 * To simplify the code and the register state management even more (wrt
4180 * immediate in AND operation), we always update the flags and skip the
4181 * extra check associated conditional jump.
4182 */
4183 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4184 <= UINT32_MAX);
4185 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4186 kIemNativeGstRegUse_ForUpdate);
4187 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4188 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4189 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4190 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4191 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4192
4193 /* Free but don't flush the EFLAGS register. */
4194 iemNativeRegFreeTmp(pReNative, idxEflReg);
4195
4196 return off;
4197}
4198
4199
4200#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4201 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4202
4203#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4204 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4205 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4206
4207/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4208DECL_INLINE_THROW(uint32_t)
4209iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4210{
4211 /* Allocate a temporary PC register. */
4212 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4213
4214 /* Perform the addition and store the result. */
4215 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4217
4218 /* Free but don't flush the PC register. */
4219 iemNativeRegFreeTmp(pReNative, idxPcReg);
4220
4221 return off;
4222}
4223
4224
4225#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4226 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4227
4228#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4229 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4230 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4231
4232/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4233DECL_INLINE_THROW(uint32_t)
4234iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4235{
4236 /* Allocate a temporary PC register. */
4237 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4238
4239 /* Perform the addition and store the result. */
4240 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4241 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4242
4243 /* Free but don't flush the PC register. */
4244 iemNativeRegFreeTmp(pReNative, idxPcReg);
4245
4246 return off;
4247}
4248
4249
4250#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4251 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4252
4253#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4254 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4256
4257/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4258DECL_INLINE_THROW(uint32_t)
4259iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4260{
4261 /* Allocate a temporary PC register. */
4262 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4263
4264 /* Perform the addition and store the result. */
4265 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4266 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4267 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4268
4269 /* Free but don't flush the PC register. */
4270 iemNativeRegFreeTmp(pReNative, idxPcReg);
4271
4272 return off;
4273}
4274
4275
4276
4277/*********************************************************************************************************************************
4278* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4279*********************************************************************************************************************************/
4280
4281#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4282 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4283 (a_enmEffOpSize), pCallEntry->idxInstr)
4284
4285#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4286 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4287 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4288
4289#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4290 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4291 IEMMODE_16BIT, pCallEntry->idxInstr)
4292
4293#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4294 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4295 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4296
4297#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4298 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4299 IEMMODE_64BIT, pCallEntry->idxInstr)
4300
4301#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4302 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4303 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4304
4305/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4306 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4307 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4308DECL_INLINE_THROW(uint32_t)
4309iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4310 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4311{
4312 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4313
4314 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4315 off = iemNativeRegFlushPendingWrites(pReNative, off);
4316
4317 /* Allocate a temporary PC register. */
4318 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4319
4320 /* Perform the addition. */
4321 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4322
4323 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4324 {
4325 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4326 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4327 }
4328 else
4329 {
4330 /* Just truncate the result to 16-bit IP. */
4331 Assert(enmEffOpSize == IEMMODE_16BIT);
4332 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4333 }
4334 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4335
4336 /* Free but don't flush the PC register. */
4337 iemNativeRegFreeTmp(pReNative, idxPcReg);
4338
4339 return off;
4340}
4341
4342
4343#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4344 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4345 (a_enmEffOpSize), pCallEntry->idxInstr)
4346
4347#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4348 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4349 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4350
4351#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4352 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4353 IEMMODE_16BIT, pCallEntry->idxInstr)
4354
4355#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4356 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4357 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4358
4359#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4360 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4361 IEMMODE_32BIT, pCallEntry->idxInstr)
4362
4363#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4364 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4365 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4366
4367/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4368 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4369 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4370DECL_INLINE_THROW(uint32_t)
4371iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4372 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4373{
4374 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4375
4376 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4377 off = iemNativeRegFlushPendingWrites(pReNative, off);
4378
4379 /* Allocate a temporary PC register. */
4380 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4381
4382 /* Perform the addition. */
4383 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4384
4385 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4386 if (enmEffOpSize == IEMMODE_16BIT)
4387 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4388
4389 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4390 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4391
4392 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4393
4394 /* Free but don't flush the PC register. */
4395 iemNativeRegFreeTmp(pReNative, idxPcReg);
4396
4397 return off;
4398}
4399
4400
4401#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4402 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4403
4404#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4405 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4406 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4407
4408#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4409 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4410
4411#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4412 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4413 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4414
4415#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4416 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4417
4418#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4419 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4420 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4421
4422/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4423DECL_INLINE_THROW(uint32_t)
4424iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4425 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4426{
4427 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4428 off = iemNativeRegFlushPendingWrites(pReNative, off);
4429
4430 /* Allocate a temporary PC register. */
4431 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4432
4433 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4434 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4435 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4436 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4437 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4438
4439 /* Free but don't flush the PC register. */
4440 iemNativeRegFreeTmp(pReNative, idxPcReg);
4441
4442 return off;
4443}
4444
4445
4446
4447/*********************************************************************************************************************************
4448* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4449*********************************************************************************************************************************/
4450
4451/**
4452 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4453 *
4454 * @returns Pointer to the condition stack entry on success, NULL on failure
4455 * (too many nestings)
4456 */
4457DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4458{
4459 uint32_t const idxStack = pReNative->cCondDepth;
4460 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4461
4462 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4463 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4464
4465 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4466 pEntry->fInElse = false;
4467 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4468 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4469
4470 return pEntry;
4471}
4472
4473
4474/**
4475 * Start of the if-block, snapshotting the register and variable state.
4476 */
4477DECL_INLINE_THROW(void)
4478iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4479{
4480 Assert(offIfBlock != UINT32_MAX);
4481 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4482 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4483 Assert(!pEntry->fInElse);
4484
4485 /* Define the start of the IF block if request or for disassembly purposes. */
4486 if (idxLabelIf != UINT32_MAX)
4487 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4488#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4489 else
4490 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4491#else
4492 RT_NOREF(offIfBlock);
4493#endif
4494
4495 /* Copy the initial state so we can restore it in the 'else' block. */
4496 pEntry->InitialState = pReNative->Core;
4497}
4498
4499
4500#define IEM_MC_ELSE() } while (0); \
4501 off = iemNativeEmitElse(pReNative, off); \
4502 do {
4503
4504/** Emits code related to IEM_MC_ELSE. */
4505DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4506{
4507 /* Check sanity and get the conditional stack entry. */
4508 Assert(off != UINT32_MAX);
4509 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4510 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4511 Assert(!pEntry->fInElse);
4512
4513 /* Jump to the endif */
4514 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4515
4516 /* Define the else label and enter the else part of the condition. */
4517 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4518 pEntry->fInElse = true;
4519
4520 /* Snapshot the core state so we can do a merge at the endif and restore
4521 the snapshot we took at the start of the if-block. */
4522 pEntry->IfFinalState = pReNative->Core;
4523 pReNative->Core = pEntry->InitialState;
4524
4525 return off;
4526}
4527
4528
4529#define IEM_MC_ENDIF() } while (0); \
4530 off = iemNativeEmitEndIf(pReNative, off)
4531
4532/** Emits code related to IEM_MC_ENDIF. */
4533DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4534{
4535 /* Check sanity and get the conditional stack entry. */
4536 Assert(off != UINT32_MAX);
4537 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4538 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4539
4540 /*
4541 * Now we have find common group with the core state at the end of the
4542 * if-final. Use the smallest common denominator and just drop anything
4543 * that isn't the same in both states.
4544 */
4545 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4546 * which is why we're doing this at the end of the else-block.
4547 * But we'd need more info about future for that to be worth the effort. */
4548 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4549 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4550 {
4551 /* shadow guest stuff first. */
4552 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4553 if (fGstRegs)
4554 {
4555 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4556 do
4557 {
4558 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4559 fGstRegs &= ~RT_BIT_64(idxGstReg);
4560
4561 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4562 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4563 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4564 {
4565 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
4566 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4567 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4568 }
4569 } while (fGstRegs);
4570 }
4571 else
4572 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4573
4574 /* Check variables next. For now we must require them to be identical
4575 or stuff we can recreate. */
4576 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4577 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4578 if (fVars)
4579 {
4580 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4581 do
4582 {
4583 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4584 fVars &= ~RT_BIT_32(idxVar);
4585
4586 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4587 {
4588 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4589 continue;
4590 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4591 {
4592 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4593 if (idxHstReg != UINT8_MAX)
4594 {
4595 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4596 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4597 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4598 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4599 }
4600 continue;
4601 }
4602 }
4603 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4604 continue;
4605
4606 /* Irreconcilable, so drop it. */
4607 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4608 if (idxHstReg != UINT8_MAX)
4609 {
4610 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4611 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4612 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4613 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4614 }
4615 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4616 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4617 } while (fVars);
4618 }
4619
4620 /* Finally, check that the host register allocations matches. */
4621 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4622 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4623 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4624 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4625 }
4626
4627 /*
4628 * Define the endif label and maybe the else one if we're still in the 'if' part.
4629 */
4630 if (!pEntry->fInElse)
4631 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4632 else
4633 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4634 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4635
4636 /* Pop the conditional stack.*/
4637 pReNative->cCondDepth -= 1;
4638
4639 return off;
4640}
4641
4642
4643#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4644 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4645 do {
4646
4647/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4648DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4649{
4650 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4651
4652 /* Get the eflags. */
4653 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4654 kIemNativeGstRegUse_ReadOnly);
4655
4656 /* Test and jump. */
4657 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4658
4659 /* Free but don't flush the EFlags register. */
4660 iemNativeRegFreeTmp(pReNative, idxEflReg);
4661
4662 /* Make a copy of the core state now as we start the if-block. */
4663 iemNativeCondStartIfBlock(pReNative, off);
4664
4665 return off;
4666}
4667
4668
4669#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4670 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4671 do {
4672
4673/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4674DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4675{
4676 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4677
4678 /* Get the eflags. */
4679 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4680 kIemNativeGstRegUse_ReadOnly);
4681
4682 /* Test and jump. */
4683 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4684
4685 /* Free but don't flush the EFlags register. */
4686 iemNativeRegFreeTmp(pReNative, idxEflReg);
4687
4688 /* Make a copy of the core state now as we start the if-block. */
4689 iemNativeCondStartIfBlock(pReNative, off);
4690
4691 return off;
4692}
4693
4694
4695#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4696 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4697 do {
4698
4699/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4700DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4701{
4702 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4703
4704 /* Get the eflags. */
4705 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4706 kIemNativeGstRegUse_ReadOnly);
4707
4708 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4709 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4710
4711 /* Test and jump. */
4712 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4713
4714 /* Free but don't flush the EFlags register. */
4715 iemNativeRegFreeTmp(pReNative, idxEflReg);
4716
4717 /* Make a copy of the core state now as we start the if-block. */
4718 iemNativeCondStartIfBlock(pReNative, off);
4719
4720 return off;
4721}
4722
4723
4724#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4725 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4726 do {
4727
4728/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4729DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4730{
4731 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4732
4733 /* Get the eflags. */
4734 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4735 kIemNativeGstRegUse_ReadOnly);
4736
4737 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4738 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4739
4740 /* Test and jump. */
4741 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4742
4743 /* Free but don't flush the EFlags register. */
4744 iemNativeRegFreeTmp(pReNative, idxEflReg);
4745
4746 /* Make a copy of the core state now as we start the if-block. */
4747 iemNativeCondStartIfBlock(pReNative, off);
4748
4749 return off;
4750}
4751
4752
4753#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4754 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4755 do {
4756
4757#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4758 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4759 do {
4760
4761/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4762DECL_INLINE_THROW(uint32_t)
4763iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4764 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4765{
4766 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4767
4768 /* Get the eflags. */
4769 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4770 kIemNativeGstRegUse_ReadOnly);
4771
4772 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4773 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4774
4775 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4776 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4777 Assert(iBitNo1 != iBitNo2);
4778
4779#ifdef RT_ARCH_AMD64
4780 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4781
4782 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4783 if (iBitNo1 > iBitNo2)
4784 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4785 else
4786 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4787 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4788
4789#elif defined(RT_ARCH_ARM64)
4790 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4791 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4792
4793 /* and tmpreg, eflreg, #1<<iBitNo1 */
4794 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4795
4796 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4797 if (iBitNo1 > iBitNo2)
4798 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4799 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4800 else
4801 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4802 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4803
4804 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4805
4806#else
4807# error "Port me"
4808#endif
4809
4810 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4811 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4812 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4813
4814 /* Free but don't flush the EFlags and tmp registers. */
4815 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4816 iemNativeRegFreeTmp(pReNative, idxEflReg);
4817
4818 /* Make a copy of the core state now as we start the if-block. */
4819 iemNativeCondStartIfBlock(pReNative, off);
4820
4821 return off;
4822}
4823
4824
4825#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4826 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4827 do {
4828
4829#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4830 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4831 do {
4832
4833/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4834 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4835DECL_INLINE_THROW(uint32_t)
4836iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4837 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4838{
4839 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4840
4841 /* We need an if-block label for the non-inverted variant. */
4842 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4843 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4844
4845 /* Get the eflags. */
4846 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4847 kIemNativeGstRegUse_ReadOnly);
4848
4849 /* Translate the flag masks to bit numbers. */
4850 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4851 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4852
4853 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4854 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4855 Assert(iBitNo1 != iBitNo);
4856
4857 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4858 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4859 Assert(iBitNo2 != iBitNo);
4860 Assert(iBitNo2 != iBitNo1);
4861
4862#ifdef RT_ARCH_AMD64
4863 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4864#elif defined(RT_ARCH_ARM64)
4865 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4866#endif
4867
4868 /* Check for the lone bit first. */
4869 if (!fInverted)
4870 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4871 else
4872 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4873
4874 /* Then extract and compare the other two bits. */
4875#ifdef RT_ARCH_AMD64
4876 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4877 if (iBitNo1 > iBitNo2)
4878 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4879 else
4880 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4881 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4882
4883#elif defined(RT_ARCH_ARM64)
4884 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4885
4886 /* and tmpreg, eflreg, #1<<iBitNo1 */
4887 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4888
4889 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4890 if (iBitNo1 > iBitNo2)
4891 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4892 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4893 else
4894 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4895 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4896
4897 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4898
4899#else
4900# error "Port me"
4901#endif
4902
4903 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4904 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4905 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4906
4907 /* Free but don't flush the EFlags and tmp registers. */
4908 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4909 iemNativeRegFreeTmp(pReNative, idxEflReg);
4910
4911 /* Make a copy of the core state now as we start the if-block. */
4912 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4913
4914 return off;
4915}
4916
4917
4918#define IEM_MC_IF_CX_IS_NZ() \
4919 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4920 do {
4921
4922/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4923DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4924{
4925 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4926
4927 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
4928 kIemNativeGstRegUse_ReadOnly);
4929 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4930 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4931
4932 iemNativeCondStartIfBlock(pReNative, off);
4933 return off;
4934}
4935
4936
4937#define IEM_MC_IF_ECX_IS_NZ() \
4938 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4939 do {
4940
4941#define IEM_MC_IF_RCX_IS_NZ() \
4942 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4943 do {
4944
4945/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4946DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4947{
4948 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4949
4950 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
4951 kIemNativeGstRegUse_ReadOnly);
4952 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4953 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4954
4955 iemNativeCondStartIfBlock(pReNative, off);
4956 return off;
4957}
4958
4959
4960#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4961 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4962 do {
4963
4964#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4965 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4966 do {
4967
4968/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4969DECL_INLINE_THROW(uint32_t)
4970iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4971{
4972 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4973
4974 /* We have to load both RCX and EFLAGS before we can start branching,
4975 otherwise we'll end up in the else-block with an inconsistent
4976 register allocator state.
4977 Doing EFLAGS first as it's more likely to be loaded, right? */
4978 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4979 kIemNativeGstRegUse_ReadOnly);
4980 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
4981 kIemNativeGstRegUse_ReadOnly);
4982
4983 /** @todo we could reduce this to a single branch instruction by spending a
4984 * temporary register and some setnz stuff. Not sure if loops are
4985 * worth it. */
4986 /* Check CX. */
4987 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4988
4989 /* Check the EFlags bit. */
4990 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4991 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4992 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4993 !fCheckIfSet /*fJmpIfSet*/);
4994
4995 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4996 iemNativeRegFreeTmp(pReNative, idxEflReg);
4997
4998 iemNativeCondStartIfBlock(pReNative, off);
4999 return off;
5000}
5001
5002
5003#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5004 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
5005 do {
5006
5007#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5008 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
5009 do {
5010
5011#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5012 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
5013 do {
5014
5015#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5016 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
5017 do {
5018
5019/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
5020 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
5021 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
5022 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
5023DECL_INLINE_THROW(uint32_t)
5024iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5025 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
5026{
5027 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5028
5029 /* We have to load both RCX and EFLAGS before we can start branching,
5030 otherwise we'll end up in the else-block with an inconsistent
5031 register allocator state.
5032 Doing EFLAGS first as it's more likely to be loaded, right? */
5033 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5034 kIemNativeGstRegUse_ReadOnly);
5035 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5036 kIemNativeGstRegUse_ReadOnly);
5037
5038 /** @todo we could reduce this to a single branch instruction by spending a
5039 * temporary register and some setnz stuff. Not sure if loops are
5040 * worth it. */
5041 /* Check RCX/ECX. */
5042 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5043
5044 /* Check the EFlags bit. */
5045 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5046 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5047 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5048 !fCheckIfSet /*fJmpIfSet*/);
5049
5050 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5051 iemNativeRegFreeTmp(pReNative, idxEflReg);
5052
5053 iemNativeCondStartIfBlock(pReNative, off);
5054 return off;
5055}
5056
5057
5058
5059/*********************************************************************************************************************************
5060* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
5061*********************************************************************************************************************************/
5062/** Number of hidden arguments for CIMPL calls.
5063 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
5064#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5065# define IEM_CIMPL_HIDDEN_ARGS 3
5066#else
5067# define IEM_CIMPL_HIDDEN_ARGS 2
5068#endif
5069
5070#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
5071 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
5072
5073#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
5074 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
5075
5076#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
5077 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
5078
5079#define IEM_MC_LOCAL(a_Type, a_Name) \
5080 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
5081
5082#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
5083 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
5084
5085
5086/**
5087 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
5088 */
5089DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
5090{
5091 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
5092 return IEM_CIMPL_HIDDEN_ARGS;
5093 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
5094 return 1;
5095 return 0;
5096}
5097
5098
5099/**
5100 * Internal work that allocates a variable with kind set to
5101 * kIemNativeVarKind_Invalid and no current stack allocation.
5102 *
5103 * The kind will either be set by the caller or later when the variable is first
5104 * assigned a value.
5105 */
5106static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5107{
5108 Assert(cbType > 0 && cbType <= 64);
5109 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
5110 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
5111 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
5112 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5113 pReNative->Core.aVars[idxVar].cbVar = cbType;
5114 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5115 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5116 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
5117 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
5118 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
5119 pReNative->Core.aVars[idxVar].u.uValue = 0;
5120 return idxVar;
5121}
5122
5123
5124/**
5125 * Internal work that allocates an argument variable w/o setting enmKind.
5126 */
5127static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5128{
5129 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
5130 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5131 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
5132
5133 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5134 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
5135 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
5136 return idxVar;
5137}
5138
5139
5140/**
5141 * Changes the variable to a stack variable.
5142 *
5143 * Currently this is s only possible to do the first time the variable is used,
5144 * switching later is can be implemented but not done.
5145 *
5146 * @param pReNative The recompiler state.
5147 * @param idxVar The variable.
5148 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5149 */
5150static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5151{
5152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5153 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5154 {
5155 /* We could in theory transition from immediate to stack as well, but it
5156 would involve the caller doing work storing the value on the stack. So,
5157 till that's required we only allow transition from invalid. */
5158 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5159 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5160 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
5161
5162 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
5163 {
5164 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5165 {
5166 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
5167 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5168 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5169 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5170 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
5171 return;
5172 }
5173 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
5174 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5175 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5176 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5177 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5178 uint32_t bmStack = ~pReNative->Core.bmStack;
5179 while (bmStack != UINT32_MAX)
5180 {
5181 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5182 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5183 if (!(iSlot & fBitAlignMask))
5184 {
5185 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5186 {
5187 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5188 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5189 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
5190 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
5191 return;
5192 }
5193 }
5194 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5195 }
5196 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5197 }
5198 }
5199}
5200
5201
5202/**
5203 * Changes it to a variable with a constant value.
5204 *
5205 * This does not require stack storage as we know the value and can always
5206 * reload it, unless of course it's referenced.
5207 *
5208 * @param pReNative The recompiler state.
5209 * @param idxVar The variable.
5210 * @param uValue The immediate value.
5211 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5212 */
5213static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5214{
5215 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5216 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5217 {
5218 /* Only simple trasnsitions for now. */
5219 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5220 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5221 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5222 }
5223 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5224}
5225
5226
5227/**
5228 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5229 *
5230 * @param pReNative The recompiler state.
5231 * @param idxVar The variable.
5232 * @param idxOtherVar The variable to take the (stack) address of.
5233 *
5234 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5235 */
5236static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5237{
5238 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5239 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5240
5241 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5242 {
5243 /* Only simple trasnsitions for now. */
5244 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5245 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5246 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5247 }
5248 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5249
5250 /* Update the other variable, ensure it's a stack variable. */
5251 /** @todo handle variables with const values... that's go boom now. */
5252 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5253 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5254}
5255
5256
5257DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5258{
5259 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5260}
5261
5262
5263DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5264{
5265 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5266 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5267 return idxVar;
5268}
5269
5270
5271DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5272{
5273 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5274 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5275 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5276 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5277
5278 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5279 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5280 return idxArgVar;
5281}
5282
5283
5284DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5285{
5286 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5287 iemNativeVarSetKindToStack(pReNative, idxVar);
5288 return idxVar;
5289}
5290
5291
5292DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5293{
5294 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5295 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5296 return idxVar;
5297}
5298
5299
5300/**
5301 * Makes sure variable @a idxVar has a register assigned to it.
5302 *
5303 * @returns The host register number.
5304 * @param pReNative The recompiler state.
5305 * @param idxVar The variable.
5306 * @param poff Pointer to the instruction buffer offset.
5307 * In case a register needs to be freed up.
5308 */
5309DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5310{
5311 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5312 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
5313
5314 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5315 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5316 {
5317 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5318 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5319 return idxReg;
5320 }
5321
5322 /*
5323 * If the kind of variable has not yet been set, default to 'stack'.
5324 */
5325 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
5326 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5327 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
5328 iemNativeVarSetKindToStack(pReNative, idxVar);
5329
5330 /*
5331 * We have to allocate a register for the variable, even if its a stack one
5332 * as we don't know if there are modification being made to it before its
5333 * finalized (todo: analyze and insert hints about that?).
5334 *
5335 * If we can, we try get the correct register for argument variables. This
5336 * is assuming that most argument variables are fetched as close as possible
5337 * to the actual call, so that there aren't any interfering hidden calls
5338 * (memory accesses, etc) inbetween.
5339 *
5340 * If we cannot or it's a variable, we make sure no argument registers
5341 * that will be used by this MC block will be allocated here, and we always
5342 * prefer non-volatile registers to avoid needing to spill stuff for internal
5343 * call.
5344 */
5345 /** @todo Detect too early argument value fetches and warn about hidden
5346 * calls causing less optimal code to be generated in the python script. */
5347
5348 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5349 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5350 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5351 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5352 else
5353 {
5354 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5355 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5356 & ~pReNative->Core.bmHstRegsWithGstShadow
5357 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5358 & fNotArgsMask;
5359 if (fRegs)
5360 {
5361 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5362 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5363 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5364 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5365 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5366 }
5367 else
5368 {
5369 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5370 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5371 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5372 }
5373 }
5374 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5375 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5376 return idxReg;
5377}
5378
5379
5380/**
5381 * The value of variable @a idxVar will be written in full to the @a enmGstReg
5382 * guest register.
5383 *
5384 * This function makes sure there is a register for it and sets it to be the
5385 * current shadow copy of @a enmGstReg.
5386 *
5387 * @returns The host register number.
5388 * @param pReNative The recompiler state.
5389 * @param idxVar The variable.
5390 * @param enmGstReg The guest register this variable will be written to
5391 * after this call.
5392 * @param poff Pointer to the instruction buffer offset.
5393 * In case a register needs to be freed up or if the
5394 * variable content needs to be loaded off the stack.
5395 *
5396 * @note We DO NOT expect @a idxVar to be an argument variable,
5397 * because we can only in the commit stage of an instruction when this
5398 * function is used.
5399 */
5400DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegisterForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5401 IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
5402{
5403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5404 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
5405 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
5406 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
5407 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
5408 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
5409 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5410
5411 /*
5412 * This shouldn't ever be used for arguments, unless it's in a weird else
5413 * branch that doesn't do any calling and even then it's questionable.
5414 *
5415 * However, in case someone writes crazy wrong MC code and does register
5416 * updates before making calls, just use the regular register allocator to
5417 * ensure we get a register suitable for the intended argument number.
5418 */
5419 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarAllocRegister(pReNative, idxVar, poff));
5420
5421 /*
5422 * If there is already a register for the variable, we transfer/set the
5423 * guest shadow copy assignment to it.
5424 */
5425 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5426 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5427 {
5428 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
5429 {
5430 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
5431 iemNativeRegTransferGstRegShadowing(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], idxReg, enmGstReg, *poff);
5432 Log12(("iemNativeVarAllocRegisterForGuestReg: Moved %s for guest %s into %s for full write\n",
5433 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
5434 }
5435 else
5436 {
5437 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
5438 Log12(("iemNativeVarAllocRegisterForGuestReg: Marking %s as copy of guest %s (full write)\n",
5439 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
5440 }
5441 /** @todo figure this one out. We need some way of making sure the register isn't
5442 * modified after this point, just in case we start writing crappy MC code. */
5443 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
5444 return idxReg;
5445 }
5446 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
5447
5448 /*
5449 * Because this is supposed to be the commit stage, we're just tag along with the
5450 * temporary register allocator and upgrade it to a variable register.
5451 */
5452 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
5453 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
5454 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
5455 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
5456 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
5457 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5458
5459 /*
5460 * Now we need to load the register value.
5461 */
5462 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
5463 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
5464 else
5465 {
5466 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5467 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_7));
5468 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
5469 switch (pReNative->Core.aVars[idxVar].cbVar)
5470 {
5471 case sizeof(uint64_t):
5472 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
5473 break;
5474 case sizeof(uint32_t):
5475 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
5476 break;
5477 case sizeof(uint16_t):
5478 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
5479 break;
5480 case sizeof(uint8_t):
5481 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
5482 break;
5483 default:
5484 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5485 }
5486 }
5487
5488 return idxReg;
5489}
5490
5491
5492/**
5493 * Worker that frees the stack slots for variable @a idxVar if any allocated.
5494 *
5495 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
5496 */
5497DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5498{
5499 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5500 Assert(idxStackSlot == UINT8_MAX || idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
5501 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
5502 {
5503 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
5504 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
5505 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
5506 Assert(cSlots > 0);
5507 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
5508 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
5509 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
5510 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5511 }
5512}
5513
5514
5515/**
5516 * Worker that frees a single variable.
5517 *
5518 * ASSUMES that @a idxVar is valid.
5519 */
5520DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5521{
5522 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5523 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5524
5525 /* Free the host register first if any assigned. */
5526 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5527 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5528 {
5529 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5530 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5531 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5532 }
5533
5534 /* Free argument mapping. */
5535 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5536 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
5537 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
5538
5539 /* Free the stack slots. */
5540 iemNativeVarFreeStackSlots(pReNative, idxVar);
5541
5542 /* Free the actual variable. */
5543 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5544 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5545}
5546
5547
5548/**
5549 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
5550 */
5551DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
5552{
5553 while (bmVars != 0)
5554 {
5555 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
5556 bmVars &= ~RT_BIT_32(idxVar);
5557
5558#if 1 /** @todo optimize by simplifying this later... */
5559 iemNativeVarFreeOneWorker(pReNative, idxVar);
5560#else
5561 /* Only need to free the host register, the rest is done as bulk updates below. */
5562 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5563 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5564 {
5565 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5566 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5567 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5568 }
5569#endif
5570 }
5571#if 0 /** @todo optimize by simplifying this later... */
5572 pReNative->Core.bmVars = 0;
5573 pReNative->Core.bmStack = 0;
5574 pReNative->Core.u64ArgVars = UINT64_MAX;
5575#endif
5576}
5577
5578
5579/**
5580 * This is called by IEM_MC_END() to clean up all variables.
5581 */
5582DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
5583{
5584 uint32_t const bmVars = pReNative->Core.bmVars;
5585 if (bmVars != 0)
5586 iemNativeVarFreeAllSlow(pReNative, bmVars);
5587 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5588 Assert(pReNative->Core.bmStack == 0);
5589}
5590
5591
5592#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
5593
5594/**
5595 * This is called by IEM_MC_FREE_LOCAL.
5596 */
5597DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5598{
5599 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5600 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
5601 iemNativeVarFreeOneWorker(pReNative, idxVar);
5602}
5603
5604
5605#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
5606
5607/**
5608 * This is called by IEM_MC_FREE_ARG.
5609 */
5610DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5611{
5612 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5613 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
5614 iemNativeVarFreeOneWorker(pReNative, idxVar);
5615}
5616
5617
5618
5619/*********************************************************************************************************************************
5620* Emitters for IEM_MC_CALL_CIMPL_XXX *
5621*********************************************************************************************************************************/
5622
5623/**
5624 * Emits code to load a reference to the given guest register into @a idxGprDst.
5625 */
5626DECL_INLINE_THROW(uint32_t)
5627iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5628 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5629{
5630 /*
5631 * Get the offset relative to the CPUMCTX structure.
5632 */
5633 uint32_t offCpumCtx;
5634 switch (enmClass)
5635 {
5636 case kIemNativeGstRegRef_Gpr:
5637 Assert(idxRegInClass < 16);
5638 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5639 break;
5640
5641 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5642 Assert(idxRegInClass < 4);
5643 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5644 break;
5645
5646 case kIemNativeGstRegRef_EFlags:
5647 Assert(idxRegInClass == 0);
5648 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5649 break;
5650
5651 case kIemNativeGstRegRef_MxCsr:
5652 Assert(idxRegInClass == 0);
5653 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5654 break;
5655
5656 case kIemNativeGstRegRef_FpuReg:
5657 Assert(idxRegInClass < 8);
5658 AssertFailed(); /** @todo what kind of indexing? */
5659 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5660 break;
5661
5662 case kIemNativeGstRegRef_MReg:
5663 Assert(idxRegInClass < 8);
5664 AssertFailed(); /** @todo what kind of indexing? */
5665 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5666 break;
5667
5668 case kIemNativeGstRegRef_XReg:
5669 Assert(idxRegInClass < 16);
5670 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5671 break;
5672
5673 default:
5674 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5675 }
5676
5677 /*
5678 * Load the value into the destination register.
5679 */
5680#ifdef RT_ARCH_AMD64
5681 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5682
5683#elif defined(RT_ARCH_ARM64)
5684 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5685 Assert(offCpumCtx < 4096);
5686 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5687
5688#else
5689# error "Port me!"
5690#endif
5691
5692 return off;
5693}
5694
5695
5696/**
5697 * Common code for CIMPL and AIMPL calls.
5698 *
5699 * These are calls that uses argument variables and such. They should not be
5700 * confused with internal calls required to implement an MC operation,
5701 * like a TLB load and similar.
5702 *
5703 * Upon return all that is left to do is to load any hidden arguments and
5704 * perform the call. All argument variables are freed.
5705 *
5706 * @returns New code buffer offset; throws VBox status code on error.
5707 * @param pReNative The native recompile state.
5708 * @param off The code buffer offset.
5709 * @param cArgs The total nubmer of arguments (includes hidden
5710 * count).
5711 * @param cHiddenArgs The number of hidden arguments. The hidden
5712 * arguments must not have any variable declared for
5713 * them, whereas all the regular arguments must
5714 * (tstIEMCheckMc ensures this).
5715 */
5716DECL_HIDDEN_THROW(uint32_t)
5717iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5718{
5719#ifdef VBOX_STRICT
5720 /*
5721 * Assert sanity.
5722 */
5723 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5724 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5725 for (unsigned i = 0; i < cHiddenArgs; i++)
5726 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5727 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5728 {
5729 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5730 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5731 }
5732#endif
5733
5734 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5735
5736 /*
5737 * First, go over the host registers that will be used for arguments and make
5738 * sure they either hold the desired argument or are free.
5739 */
5740 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5741 for (uint32_t i = 0; i < cRegArgs; i++)
5742 {
5743 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5744 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5745 {
5746 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5747 {
5748 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5749 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5750 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5751 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5752 if (uArgNo == i)
5753 { /* prefect */ }
5754 else
5755 {
5756 /* The variable allocator logic should make sure this is impossible. */
5757 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5758
5759 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5760 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5761 else
5762 {
5763 /* just free it, can be reloaded if used again */
5764 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5765 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5766 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5767 }
5768 }
5769 }
5770 else
5771 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5772 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5773 }
5774 }
5775
5776 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5777
5778 /*
5779 * Make sure the argument variables are loaded into their respective registers.
5780 *
5781 * We can optimize this by ASSUMING that any register allocations are for
5782 * registeres that have already been loaded and are ready. The previous step
5783 * saw to that.
5784 */
5785 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5786 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5787 {
5788 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5789 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5790 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5791 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5792 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5793 else
5794 {
5795 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5796 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5797 {
5798 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5799 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5800 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5801 | RT_BIT_32(idxArgReg);
5802 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5803 }
5804 else
5805 {
5806 /* Use ARG0 as temp for stuff we need registers for. */
5807 switch (pReNative->Core.aVars[idxVar].enmKind)
5808 {
5809 case kIemNativeVarKind_Stack:
5810 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5811 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5812 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeVarCalcBpDisp(pReNative, idxVar));
5813 continue;
5814
5815 case kIemNativeVarKind_Immediate:
5816 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5817 continue;
5818
5819 case kIemNativeVarKind_VarRef:
5820 {
5821 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5822 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5823 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5824 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5825 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5826 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
5827 continue;
5828 }
5829
5830 case kIemNativeVarKind_GstRegRef:
5831 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5832 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5833 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5834 continue;
5835
5836 case kIemNativeVarKind_Invalid:
5837 case kIemNativeVarKind_End:
5838 break;
5839 }
5840 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5841 }
5842 }
5843 }
5844#ifdef VBOX_STRICT
5845 else
5846 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5847 {
5848 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5849 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5850 }
5851#endif
5852
5853#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5854 /*
5855 * If there are any stack arguments, make sure they are in their place as well.
5856 *
5857 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5858 * caller will load it later and it must be free (see first loop).
5859 */
5860 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5861 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5862 {
5863 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5864 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5865 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5866 {
5867 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5868 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5870 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5871 }
5872 else
5873 {
5874 /* Use ARG0 as temp for stuff we need registers for. */
5875 switch (pReNative->Core.aVars[idxVar].enmKind)
5876 {
5877 case kIemNativeVarKind_Stack:
5878 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5879 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5880 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5881 iemNativeVarCalcBpDisp(pReNative, idxVar));
5882 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5883 continue;
5884
5885 case kIemNativeVarKind_Immediate:
5886 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5887 continue;
5888
5889 case kIemNativeVarKind_VarRef:
5890 {
5891 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5892 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5893 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5894 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5895 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5896 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
5897 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5898 continue;
5899 }
5900
5901 case kIemNativeVarKind_GstRegRef:
5902 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5903 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5904 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5905 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5906 continue;
5907
5908 case kIemNativeVarKind_Invalid:
5909 case kIemNativeVarKind_End:
5910 break;
5911 }
5912 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5913 }
5914 }
5915#else
5916 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5917#endif
5918
5919 /*
5920 * Free all argument variables (simplified).
5921 * Their lifetime always expires with the call they are for.
5922 */
5923 /** @todo Make the python script check that arguments aren't used after
5924 * IEM_MC_CALL_XXXX. */
5925 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
5926 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
5927 * an argument value. There is also some FPU stuff. */
5928 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5929 {
5930 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5931 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5932
5933 /* no need to free registers: */
5934 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
5935 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
5936 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
5937 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
5938 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
5939 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
5940
5941 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5942 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5943 iemNativeVarFreeStackSlots(pReNative, idxVar);
5944 }
5945 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5946
5947 /*
5948 * Flush volatile registers as we make the call.
5949 */
5950 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5951
5952 return off;
5953}
5954
5955
5956/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5957DECL_HIDDEN_THROW(uint32_t)
5958iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5959 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
5960
5961{
5962 /*
5963 * Do all the call setup and cleanup.
5964 */
5965 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5966
5967 /*
5968 * Load the two hidden arguments.
5969 */
5970#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5971 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5972 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5973 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5974#else
5975 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5976 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5977#endif
5978
5979 /*
5980 * Make the call and check the return code.
5981 *
5982 * Shadow PC copies are always flushed here, other stuff depends on flags.
5983 * Segment and general purpose registers are explictily flushed via the
5984 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
5985 * macros.
5986 */
5987 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5988#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5989 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5990#endif
5991 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
5992 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
5993 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5994 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5995
5996 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5997}
5998
5999
6000#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6001 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
6002
6003/** Emits code for IEM_MC_CALL_CIMPL_1. */
6004DECL_INLINE_THROW(uint32_t)
6005iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6006 uintptr_t pfnCImpl, uint8_t idxArg0)
6007{
6008 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
6009 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
6010 RT_NOREF_PV(idxArg0);
6011
6012 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
6013}
6014
6015
6016#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6017 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
6018
6019/** Emits code for IEM_MC_CALL_CIMPL_2. */
6020DECL_INLINE_THROW(uint32_t)
6021iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6022 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
6023{
6024 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
6025 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
6026 RT_NOREF_PV(idxArg0);
6027
6028 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
6029 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
6030 RT_NOREF_PV(idxArg1);
6031
6032 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
6033}
6034
6035
6036#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6037 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6038 (uintptr_t)a_pfnCImpl, a0, a1, a2)
6039
6040/** Emits code for IEM_MC_CALL_CIMPL_3. */
6041DECL_INLINE_THROW(uint32_t)
6042iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6043 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
6044{
6045pReNative->pInstrBuf[off++] = 0xcc;
6046 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
6047 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
6048 RT_NOREF_PV(idxArg0);
6049
6050 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
6051 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
6052 RT_NOREF_PV(idxArg1);
6053
6054 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
6055 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
6056 RT_NOREF_PV(idxArg2);
6057
6058 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
6059}
6060
6061
6062#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
6063 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6064 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
6065
6066/** Emits code for IEM_MC_CALL_CIMPL_4. */
6067DECL_INLINE_THROW(uint32_t)
6068iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6069 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
6070{
6071pReNative->pInstrBuf[off++] = 0xcc;
6072 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
6073 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
6074 RT_NOREF_PV(idxArg0);
6075
6076 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
6077 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
6078 RT_NOREF_PV(idxArg1);
6079
6080 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
6081 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
6082 RT_NOREF_PV(idxArg2);
6083
6084 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
6085 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
6086 RT_NOREF_PV(idxArg3);
6087
6088 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
6089}
6090
6091
6092#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
6093 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6094 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
6095
6096/** Emits code for IEM_MC_CALL_CIMPL_4. */
6097DECL_INLINE_THROW(uint32_t)
6098iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6099 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
6100{
6101pReNative->pInstrBuf[off++] = 0xcc;
6102 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
6103 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
6104 RT_NOREF_PV(idxArg0);
6105
6106 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
6107 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
6108 RT_NOREF_PV(idxArg1);
6109
6110 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
6111 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
6112 RT_NOREF_PV(idxArg2);
6113
6114 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
6115 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
6116 RT_NOREF_PV(idxArg3);
6117
6118 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
6119 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
6120 RT_NOREF_PV(idxArg4);
6121
6122 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
6123}
6124
6125
6126/** Recompiler debugging: Flush guest register shadow copies. */
6127#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
6128
6129
6130
6131
6132/*********************************************************************************************************************************
6133* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
6134*********************************************************************************************************************************/
6135
6136#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
6137 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
6138
6139/** Emits code for IEM_MC_FETCH_GREG_U16. */
6140DECL_INLINE_THROW(uint32_t)
6141iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6142{
6143 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6144 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
6145
6146 /*
6147 * We can either just load the low 16-bit of the GPR into a host register
6148 * for the variable, or we can do so via a shadow copy host register. The
6149 * latter will avoid having to reload it if it's being stored later, but
6150 * will waste a host register if it isn't touched again. Since we don't
6151 * know what going to happen, we choose the latter for now.
6152 */
6153 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6154 kIemNativeGstRegUse_ReadOnly);
6155
6156 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6157 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6158 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
6159
6160 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6161 return off;
6162}
6163
6164
6165#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
6166 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg)
6167
6168/** Emits code for IEM_MC_FETCH_GREG_U32. */
6169DECL_INLINE_THROW(uint32_t)
6170iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6171{
6172 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6173 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint32_t));
6174
6175 /*
6176 * We can either just load the low 16-bit of the GPR into a host register
6177 * for the variable, or we can do so via a shadow copy host register. The
6178 * latter will avoid having to reload it if it's being stored later, but
6179 * will waste a host register if it isn't touched again. Since we don't
6180 * know what going to happen, we choose the latter for now.
6181 */
6182 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6183 kIemNativeGstRegUse_ReadOnly);
6184
6185 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6186 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6187 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
6188
6189 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6190 return off;
6191}
6192
6193
6194
6195/*********************************************************************************************************************************
6196* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
6197*********************************************************************************************************************************/
6198
6199#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
6200 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
6201
6202/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
6203DECL_INLINE_THROW(uint32_t)
6204iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
6205{
6206 Assert(iGRegEx < 20);
6207 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
6208 kIemNativeGstRegUse_ForUpdate);
6209#ifdef RT_ARCH_AMD64
6210 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6211
6212 /* To the lowest byte of the register: mov r8, imm8 */
6213 if (iGRegEx < 16)
6214 {
6215 if (idxGstTmpReg >= 8)
6216 pbCodeBuf[off++] = X86_OP_REX_B;
6217 else if (idxGstTmpReg >= 4)
6218 pbCodeBuf[off++] = X86_OP_REX;
6219 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6220 pbCodeBuf[off++] = u8Value;
6221 }
6222 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
6223 else if (idxGstTmpReg < 4)
6224 {
6225 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
6226 pbCodeBuf[off++] = u8Value;
6227 }
6228 else
6229 {
6230 /* ror reg64, 8 */
6231 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6232 pbCodeBuf[off++] = 0xc1;
6233 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6234 pbCodeBuf[off++] = 8;
6235
6236 /* mov reg8, imm8 */
6237 if (idxGstTmpReg >= 8)
6238 pbCodeBuf[off++] = X86_OP_REX_B;
6239 else if (idxGstTmpReg >= 4)
6240 pbCodeBuf[off++] = X86_OP_REX;
6241 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6242 pbCodeBuf[off++] = u8Value;
6243
6244 /* rol reg64, 8 */
6245 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6246 pbCodeBuf[off++] = 0xc1;
6247 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
6248 pbCodeBuf[off++] = 8;
6249 }
6250
6251#elif defined(RT_ARCH_ARM64)
6252 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
6253 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6254 if (iGRegEx < 16)
6255 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
6256 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
6257 else
6258 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
6259 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
6260 iemNativeRegFreeTmp(pReNative, idxImmReg);
6261
6262#else
6263# error "Port me!"
6264#endif
6265
6266 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6267
6268 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
6269
6270 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6271 return off;
6272}
6273
6274
6275#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
6276 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
6277
6278/** Emits code for IEM_MC_STORE_GREG_U16. */
6279DECL_INLINE_THROW(uint32_t)
6280iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
6281{
6282 Assert(iGReg < 16);
6283 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6284 kIemNativeGstRegUse_ForUpdate);
6285#ifdef RT_ARCH_AMD64
6286 /* mov reg16, imm16 */
6287 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6288 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6289 if (idxGstTmpReg >= 8)
6290 pbCodeBuf[off++] = X86_OP_REX_B;
6291 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
6292 pbCodeBuf[off++] = RT_BYTE1(uValue);
6293 pbCodeBuf[off++] = RT_BYTE2(uValue);
6294
6295#elif defined(RT_ARCH_ARM64)
6296 /* movk xdst, #uValue, lsl #0 */
6297 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6298 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
6299
6300#else
6301# error "Port me!"
6302#endif
6303
6304 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6305
6306 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6307 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6308 return off;
6309}
6310
6311
6312#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
6313 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
6314
6315/** Emits code for IEM_MC_STORE_GREG_U16. */
6316DECL_INLINE_THROW(uint32_t)
6317iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6318{
6319 Assert(iGReg < 16);
6320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6321
6322 /*
6323 * If it's a constant value (unlikely) we treat this as a
6324 * IEM_MC_STORE_GREG_U16_CONST statement.
6325 */
6326 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6327 { /* likely */ }
6328 else
6329 {
6330 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6331 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6332 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6333 }
6334
6335 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6336 kIemNativeGstRegUse_ForUpdate);
6337
6338#ifdef RT_ARCH_AMD64
6339 /* mov reg16, reg16 or [mem16] */
6340 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6341 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6342 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6343 {
6344 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
6345 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
6346 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
6347 pbCodeBuf[off++] = 0x8b;
6348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
6349 }
6350 else
6351 {
6352 AssertStmt(pReNative->Core.aVars[idxValueVar].idxStackSlot != UINT8_MAX,
6353 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
6354 if (idxGstTmpReg >= 8)
6355 pbCodeBuf[off++] = X86_OP_REX_R;
6356 pbCodeBuf[off++] = 0x8b;
6357 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeVarCalcBpDisp(pReNative, idxValueVar), pReNative);
6358 }
6359
6360#elif defined(RT_ARCH_ARM64)
6361 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6362 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6363 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6364 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
6365
6366#else
6367# error "Port me!"
6368#endif
6369
6370 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6371
6372 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6373 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6374 return off;
6375}
6376
6377
6378#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
6379 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
6380
6381/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
6382DECL_INLINE_THROW(uint32_t)
6383iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
6384{
6385 Assert(iGReg < 16);
6386 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6387 kIemNativeGstRegUse_ForFullWrite);
6388 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
6389 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6390 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6391 return off;
6392}
6393
6394
6395#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
6396 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
6397
6398/** Emits code for IEM_MC_STORE_GREG_U32. */
6399DECL_INLINE_THROW(uint32_t)
6400iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6401{
6402 Assert(iGReg < 16);
6403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6404
6405 /*
6406 * If it's a constant value (unlikely) we treat this as a
6407 * IEM_MC_STORE_GREG_U32_CONST statement.
6408 */
6409 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6410 { /* likely */ }
6411 else
6412 {
6413 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6414 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6415 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6416 }
6417
6418 /*
6419 * For the rest we allocate a guest register for the variable and writes
6420 * it to the CPUMCTX structure.
6421 */
6422 uint8_t const idxVarReg = iemNativeVarAllocRegisterForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
6423 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6424#ifdef VBOX_STRICT
6425 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
6426#endif
6427 return off;
6428}
6429
6430
6431
6432#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
6433 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
6434
6435/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
6436DECL_INLINE_THROW(uint32_t)
6437iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
6438{
6439 Assert(iGReg < 16);
6440 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6441 kIemNativeGstRegUse_ForUpdate);
6442 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
6443 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6444 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6445 return off;
6446}
6447
6448
6449
6450/*********************************************************************************************************************************
6451* General purpose register manipulation (add, sub). *
6452*********************************************************************************************************************************/
6453
6454#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
6455 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
6456
6457/** Emits code for IEM_MC_SUB_GREG_U16. */
6458DECL_INLINE_THROW(uint32_t)
6459iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
6460{
6461 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6462 kIemNativeGstRegUse_ForUpdate);
6463
6464#ifdef RT_ARCH_AMD64
6465 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6466 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6467 if (idxGstTmpReg >= 8)
6468 pbCodeBuf[off++] = X86_OP_REX_B;
6469 if (uSubtrahend)
6470 {
6471 pbCodeBuf[off++] = 0xff; /* dec */
6472 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6473 }
6474 else
6475 {
6476 pbCodeBuf[off++] = 0x81;
6477 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6478 pbCodeBuf[off++] = uSubtrahend;
6479 pbCodeBuf[off++] = 0;
6480 }
6481
6482#else
6483 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6484 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6485
6486 /* sub tmp, gstgrp, uSubtrahend */
6487 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
6488
6489 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
6490 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
6491
6492 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6493#endif
6494
6495 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6496
6497 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6498
6499 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6500 return off;
6501}
6502
6503
6504#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
6505 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
6506
6507#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
6508 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
6509
6510/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
6511DECL_INLINE_THROW(uint32_t)
6512iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
6513{
6514 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6515 kIemNativeGstRegUse_ForUpdate);
6516
6517#ifdef RT_ARCH_AMD64
6518 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6519 if (f64Bit)
6520 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
6521 else if (idxGstTmpReg >= 8)
6522 pbCodeBuf[off++] = X86_OP_REX_B;
6523 if (uSubtrahend == 1)
6524 {
6525 /* dec */
6526 pbCodeBuf[off++] = 0xff;
6527 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6528 }
6529 else if (uSubtrahend < 128)
6530 {
6531 pbCodeBuf[off++] = 0x83; /* sub */
6532 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6533 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6534 }
6535 else
6536 {
6537 pbCodeBuf[off++] = 0x81; /* sub */
6538 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6539 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6540 pbCodeBuf[off++] = 0;
6541 pbCodeBuf[off++] = 0;
6542 pbCodeBuf[off++] = 0;
6543 }
6544
6545#else
6546 /* sub tmp, gstgrp, uSubtrahend */
6547 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6548 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
6549
6550#endif
6551
6552 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6553
6554 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6555
6556 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6557 return off;
6558}
6559
6560
6561
6562/*********************************************************************************************************************************
6563* Effective Address Calculation *
6564*********************************************************************************************************************************/
6565#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6566 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6567
6568/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6569 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6570DECL_INLINE_THROW(uint32_t)
6571iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6572 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6573{
6574 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6575 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
6576
6577 /* Handle the disp16 form with no registers first. */
6578 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6579 return iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u16Disp);
6580
6581 /* Determin the displacment. */
6582 uint16_t u16EffAddr;
6583 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6584 {
6585 case 0: u16EffAddr = 0; break;
6586 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6587 case 2: u16EffAddr = u16Disp; break;
6588 default: AssertFailedStmt(u16EffAddr = 0);
6589 }
6590
6591 /* Determine the registers involved. */
6592 uint8_t idxGstRegBase;
6593 uint8_t idxGstRegIndex;
6594 switch (bRm & X86_MODRM_RM_MASK)
6595 {
6596 case 0:
6597 idxGstRegBase = X86_GREG_xBX;
6598 idxGstRegIndex = X86_GREG_xSI;
6599 break;
6600 case 1:
6601 idxGstRegBase = X86_GREG_xBX;
6602 idxGstRegIndex = X86_GREG_xDI;
6603 break;
6604 case 2:
6605 idxGstRegBase = X86_GREG_xBP;
6606 idxGstRegIndex = X86_GREG_xSI;
6607 break;
6608 case 3:
6609 idxGstRegBase = X86_GREG_xBP;
6610 idxGstRegIndex = X86_GREG_xDI;
6611 break;
6612 case 4:
6613 idxGstRegBase = X86_GREG_xSI;
6614 idxGstRegIndex = UINT8_MAX;
6615 break;
6616 case 5:
6617 idxGstRegBase = X86_GREG_xDI;
6618 idxGstRegIndex = UINT8_MAX;
6619 break;
6620 case 6:
6621 idxGstRegBase = X86_GREG_xBP;
6622 idxGstRegIndex = UINT8_MAX;
6623 break;
6624#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6625 default:
6626#endif
6627 case 7:
6628 idxGstRegBase = X86_GREG_xBX;
6629 idxGstRegIndex = UINT8_MAX;
6630 break;
6631 }
6632
6633 /*
6634 * Now calculate: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6635 */
6636 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6637 kIemNativeGstRegUse_ReadOnly);
6638 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6639 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6640 kIemNativeGstRegUse_ReadOnly)
6641 : UINT8_MAX;
6642#ifdef RT_ARCH_AMD64
6643 if (idxRegIndex == UINT8_MAX)
6644 {
6645 if (u16EffAddr == 0)
6646 {
6647 /* movxz ret, base */
6648 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6649 }
6650 else
6651 {
6652 /* lea ret32, [base64 + disp32] */
6653 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6654 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6655 if (idxRegRet >= 8 || idxRegBase >= 8)
6656 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6657 pbCodeBuf[off++] = 0x8d;
6658 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6659 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6660 else
6661 {
6662 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6663 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6664 }
6665 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6666 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6667 pbCodeBuf[off++] = 0;
6668 pbCodeBuf[off++] = 0;
6669 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6670
6671 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6672 }
6673 }
6674 else
6675 {
6676 /* lea ret32, [index64 + base64 (+ disp32)] */
6677 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6678 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6679 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6680 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6681 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6682 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6683 pbCodeBuf[off++] = 0x8d;
6684 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6685 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6686 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6687 if (bMod == X86_MOD_MEM4)
6688 {
6689 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6690 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6691 pbCodeBuf[off++] = 0;
6692 pbCodeBuf[off++] = 0;
6693 }
6694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6695 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6696 }
6697
6698#elif defined(RT_ARCH_ARM64)
6699 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6700 if (u16EffAddr == 0)
6701 {
6702 if (idxRegIndex == UINT8_MAX)
6703 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6704 else
6705 {
6706 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6707 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6708 }
6709 }
6710 else
6711 {
6712 if ((int16_t)u16Disp < 4096 && (int16_t)u16Disp >= 0)
6713 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16Disp, false /*f64Bit*/);
6714 else if ((int16_t)u16Disp > -4096 && (int16_t)u16Disp < 0)
6715 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6716 (uint16_t)-(int16_t)u16Disp, false /*f64Bit*/);
6717 else
6718 {
6719 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16Disp);
6720 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6721 }
6722 if (idxRegIndex != UINT8_MAX)
6723 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6724 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6725 }
6726
6727#else
6728# error "port me"
6729#endif
6730
6731 if (idxRegIndex != UINT8_MAX)
6732 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6733 iemNativeRegFreeTmp(pReNative, idxRegBase);
6734 return off;
6735}
6736
6737
6738#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6739 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6740
6741#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6742 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff)
6743
6744#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6745 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 64)
6746
6747#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6748 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 32)
6749
6750
6751
6752/*********************************************************************************************************************************
6753* Builtin functions *
6754*********************************************************************************************************************************/
6755
6756/**
6757 * Built-in function that calls a C-implemention function taking zero arguments.
6758 */
6759static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
6760{
6761 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
6762 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
6763 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
6764 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
6765}
6766
6767
6768/**
6769 * Built-in function that checks for pending interrupts that can be delivered or
6770 * forced action flags.
6771 *
6772 * This triggers after the completion of an instruction, so EIP is already at
6773 * the next instruction. If an IRQ or important FF is pending, this will return
6774 * a non-zero status that stops TB execution.
6775 */
6776static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
6777{
6778 RT_NOREF(pCallEntry);
6779
6780 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
6781 and I'm too lazy to create a 'Fixed' version of that one. */
6782 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
6783 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
6784
6785 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
6786
6787 /* Again, we need to load the extended EFLAGS before we actually need them
6788 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
6789 loaded them inside the check, as the shadow state would not be correct
6790 when the code branches before the load. Ditto PC. */
6791 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6792 kIemNativeGstRegUse_ReadOnly);
6793
6794 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
6795
6796 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6797
6798 /*
6799 * Start by checking the local forced actions of the EMT we're on for IRQs
6800 * and other FFs that needs servicing.
6801 */
6802 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
6803 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
6804 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
6805 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6806 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
6807 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
6808 | VMCPU_FF_TLB_FLUSH
6809 | VMCPU_FF_UNHALT ),
6810 true /*fSetFlags*/);
6811 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
6812 uint32_t const offFixupJumpToVmCheck1 = off;
6813 off = iemNativeEmitJzToFixed(pReNative, off, 0);
6814
6815 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
6816 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
6817 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6818 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
6819 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
6820 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6821
6822 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
6823 suppressed by the CPU or not. */
6824 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
6825 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
6826 idxLabelReturnBreak);
6827
6828 /* We've got shadow flags set, so we must check that the PC they are valid
6829 for matches our current PC value. */
6830 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
6831 * a register. */
6832 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
6833 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
6834
6835 /*
6836 * Now check the force flags of the VM.
6837 */
6838 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
6839 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
6840 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
6841 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
6842 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
6843 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6844
6845 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
6846
6847 /*
6848 * We're good, no IRQs or FFs pending.
6849 */
6850 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6851 iemNativeRegFreeTmp(pReNative, idxEflReg);
6852 iemNativeRegFreeTmp(pReNative, idxPcReg);
6853
6854 return off;
6855}
6856
6857
6858/**
6859 * Built-in function checks if IEMCPU::fExec has the expected value.
6860 */
6861static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
6862{
6863 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
6864 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6865
6866 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6867 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
6868 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
6869 kIemNativeLabelType_ReturnBreak);
6870 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6871 return off;
6872}
6873
6874
6875
6876/*********************************************************************************************************************************
6877* The native code generator functions for each MC block. *
6878*********************************************************************************************************************************/
6879
6880
6881/*
6882 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6883 *
6884 * This should probably live in it's own file later, but lets see what the
6885 * compile times turn out to be first.
6886 */
6887#include "IEMNativeFunctions.cpp.h"
6888
6889
6890
6891/*********************************************************************************************************************************
6892* Recompiler Core. *
6893*********************************************************************************************************************************/
6894
6895
6896/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6897static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6898{
6899 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6900 pDis->cbCachedInstr += cbMaxRead;
6901 RT_NOREF(cbMinRead);
6902 return VERR_NO_DATA;
6903}
6904
6905
6906/**
6907 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6908 * @returns pszBuf.
6909 * @param fFlags The flags.
6910 * @param pszBuf The output buffer.
6911 * @param cbBuf The output buffer size. At least 32 bytes.
6912 */
6913DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6914{
6915 Assert(cbBuf >= 32);
6916 static RTSTRTUPLE const s_aModes[] =
6917 {
6918 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6919 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6920 /* [02] = */ { RT_STR_TUPLE("!2!") },
6921 /* [03] = */ { RT_STR_TUPLE("!3!") },
6922 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6923 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6924 /* [06] = */ { RT_STR_TUPLE("!6!") },
6925 /* [07] = */ { RT_STR_TUPLE("!7!") },
6926 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6927 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6928 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6929 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6930 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6931 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6932 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6933 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6934 /* [10] = */ { RT_STR_TUPLE("!10!") },
6935 /* [11] = */ { RT_STR_TUPLE("!11!") },
6936 /* [12] = */ { RT_STR_TUPLE("!12!") },
6937 /* [13] = */ { RT_STR_TUPLE("!13!") },
6938 /* [14] = */ { RT_STR_TUPLE("!14!") },
6939 /* [15] = */ { RT_STR_TUPLE("!15!") },
6940 /* [16] = */ { RT_STR_TUPLE("!16!") },
6941 /* [17] = */ { RT_STR_TUPLE("!17!") },
6942 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6943 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6944 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6945 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6946 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6947 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6948 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6949 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6950 };
6951 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6952 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6953 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6954
6955 pszBuf[off++] = ' ';
6956 pszBuf[off++] = 'C';
6957 pszBuf[off++] = 'P';
6958 pszBuf[off++] = 'L';
6959 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6960 Assert(off < 32);
6961
6962 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6963
6964 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6965 {
6966 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6967 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6968 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6969 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6970 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6971 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6972 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6973 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6974 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6975 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6976 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6977 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6978 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6979 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6980 };
6981 if (fFlags)
6982 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6983 if (s_aFlags[i].fFlag & fFlags)
6984 {
6985 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6986 pszBuf[off++] = ' ';
6987 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6988 off += s_aFlags[i].cchName;
6989 fFlags &= ~s_aFlags[i].fFlag;
6990 if (!fFlags)
6991 break;
6992 }
6993 pszBuf[off] = '\0';
6994
6995 return pszBuf;
6996}
6997
6998
6999DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
7000{
7001 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
7002
7003 char szDisBuf[512];
7004 DISSTATE Dis;
7005 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
7006 uint32_t const cNative = pTb->Native.cInstructions;
7007 uint32_t offNative = 0;
7008#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7009 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
7010#endif
7011 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
7012 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
7013 : DISCPUMODE_64BIT;
7014#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7015 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
7016#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7017 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
7018#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7019# error "Port me"
7020#else
7021 csh hDisasm = ~(size_t)0;
7022# if defined(RT_ARCH_AMD64)
7023 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
7024# elif defined(RT_ARCH_ARM64)
7025 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
7026# else
7027# error "Port me"
7028# endif
7029 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
7030#endif
7031
7032 /*
7033 * Print TB info.
7034 */
7035 pHlp->pfnPrintf(pHlp,
7036 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
7037 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
7038 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
7039 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
7040#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7041 if (pDbgInfo && pDbgInfo->cEntries > 1)
7042 {
7043 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
7044
7045 /*
7046 * This disassembly is driven by the debug info which follows the native
7047 * code and indicates when it starts with the next guest instructions,
7048 * where labels are and such things.
7049 */
7050 uint32_t idxThreadedCall = 0;
7051 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
7052 uint8_t idxRange = UINT8_MAX;
7053 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
7054 uint32_t offRange = 0;
7055 uint32_t offOpcodes = 0;
7056 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
7057 uint32_t const cDbgEntries = pDbgInfo->cEntries;
7058 uint32_t iDbgEntry = 1;
7059 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
7060
7061 while (offNative < cNative)
7062 {
7063 /* If we're at or have passed the point where the next chunk of debug
7064 info starts, process it. */
7065 if (offDbgNativeNext <= offNative)
7066 {
7067 offDbgNativeNext = UINT32_MAX;
7068 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
7069 {
7070 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
7071 {
7072 case kIemTbDbgEntryType_GuestInstruction:
7073 {
7074 /* Did the exec flag change? */
7075 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
7076 {
7077 pHlp->pfnPrintf(pHlp,
7078 " fExec change %#08x -> %#08x %s\n",
7079 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
7080 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
7081 szDisBuf, sizeof(szDisBuf)));
7082 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
7083 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
7084 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
7085 : DISCPUMODE_64BIT;
7086 }
7087
7088 /* New opcode range? We need to fend up a spurious debug info entry here for cases
7089 where the compilation was aborted before the opcode was recorded and the actual
7090 instruction was translated to a threaded call. This may happen when we run out
7091 of ranges, or when some complicated interrupts/FFs are found to be pending or
7092 similar. So, we just deal with it here rather than in the compiler code as it
7093 is a lot simpler to do up here. */
7094 if ( idxRange == UINT8_MAX
7095 || idxRange >= cRanges
7096 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
7097 {
7098 idxRange += 1;
7099 if (idxRange < cRanges)
7100 offRange = 0;
7101 else
7102 continue;
7103 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
7104 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
7105 + (pTb->aRanges[idxRange].idxPhysPage == 0
7106 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
7107 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
7108 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
7109 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
7110 pTb->aRanges[idxRange].idxPhysPage);
7111 }
7112
7113 /* Disassemble the instruction. */
7114 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
7115 uint32_t cbInstr = 1;
7116 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
7117 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
7118 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
7119 if (RT_SUCCESS(rc))
7120 {
7121 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7122 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7123 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7124 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7125
7126 static unsigned const s_offMarker = 55;
7127 static char const s_szMarker[] = " ; <--- guest";
7128 if (cch < s_offMarker)
7129 {
7130 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
7131 cch = s_offMarker;
7132 }
7133 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
7134 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
7135
7136 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
7137 }
7138 else
7139 {
7140 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
7141 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
7142 cbInstr = 1;
7143 }
7144 GCPhysPc += cbInstr;
7145 offOpcodes += cbInstr;
7146 offRange += cbInstr;
7147 continue;
7148 }
7149
7150 case kIemTbDbgEntryType_ThreadedCall:
7151 pHlp->pfnPrintf(pHlp,
7152 " Call #%u to %s (%u args)%s\n",
7153 idxThreadedCall,
7154 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
7155 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
7156 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
7157 idxThreadedCall++;
7158 continue;
7159
7160 case kIemTbDbgEntryType_GuestRegShadowing:
7161 {
7162 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
7163 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
7164 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
7165 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
7166 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
7167 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
7168 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
7169 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
7170 else
7171 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
7172 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
7173 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
7174 continue;
7175 }
7176
7177 case kIemTbDbgEntryType_Label:
7178 {
7179 const char *pszName = "what_the_fudge";
7180 const char *pszComment = "";
7181 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
7182 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
7183 {
7184 case kIemNativeLabelType_Return:
7185 pszName = "Return";
7186 break;
7187 case kIemNativeLabelType_ReturnBreak:
7188 pszName = "ReturnBreak";
7189 break;
7190 case kIemNativeLabelType_ReturnWithFlags:
7191 pszName = "ReturnWithFlags";
7192 break;
7193 case kIemNativeLabelType_NonZeroRetOrPassUp:
7194 pszName = "NonZeroRetOrPassUp";
7195 break;
7196 case kIemNativeLabelType_RaiseGp0:
7197 pszName = "RaiseGp0";
7198 break;
7199 case kIemNativeLabelType_If:
7200 pszName = "If";
7201 fNumbered = true;
7202 break;
7203 case kIemNativeLabelType_Else:
7204 pszName = "Else";
7205 fNumbered = true;
7206 pszComment = " ; regs state restored pre-if-block";
7207 break;
7208 case kIemNativeLabelType_Endif:
7209 pszName = "Endif";
7210 fNumbered = true;
7211 break;
7212 case kIemNativeLabelType_CheckIrq:
7213 pszName = "CheckIrq_CheckVM";
7214 fNumbered = true;
7215 break;
7216 case kIemNativeLabelType_Invalid:
7217 case kIemNativeLabelType_End:
7218 break;
7219 }
7220 if (fNumbered)
7221 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
7222 else
7223 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
7224 continue;
7225 }
7226
7227 case kIemTbDbgEntryType_NativeOffset:
7228 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
7229 Assert(offDbgNativeNext > offNative);
7230 break;
7231
7232 default:
7233 AssertFailed();
7234 }
7235 iDbgEntry++;
7236 break;
7237 }
7238 }
7239
7240 /*
7241 * Disassemble the next native instruction.
7242 */
7243 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
7244# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
7245 uint32_t cbInstr = sizeof(paNative[0]);
7246 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
7247 if (RT_SUCCESS(rc))
7248 {
7249# if defined(RT_ARCH_AMD64)
7250 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
7251 {
7252 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
7253 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
7254 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
7255 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
7256 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
7257 uInfo & 0x8000 ? " - recompiled" : "");
7258 else
7259 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
7260 }
7261 else
7262# endif
7263 {
7264# ifdef RT_ARCH_AMD64
7265 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7266 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7267 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7268 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7269# elif defined(RT_ARCH_ARM64)
7270 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
7271 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7272 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7273# else
7274# error "Port me"
7275# endif
7276 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
7277 }
7278 }
7279 else
7280 {
7281# if defined(RT_ARCH_AMD64)
7282 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
7283 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
7284# elif defined(RT_ARCH_ARM64)
7285 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
7286# else
7287# error "Port me"
7288# endif
7289 cbInstr = sizeof(paNative[0]);
7290 }
7291 offNative += cbInstr / sizeof(paNative[0]);
7292
7293# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7294 cs_insn *pInstr;
7295 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
7296 (uintptr_t)pNativeCur, 1, &pInstr);
7297 if (cInstrs > 0)
7298 {
7299 Assert(cInstrs == 1);
7300# if defined(RT_ARCH_AMD64)
7301 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
7302 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
7303# else
7304 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
7305 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
7306# endif
7307 offNative += pInstr->size / sizeof(*pNativeCur);
7308 cs_free(pInstr, cInstrs);
7309 }
7310 else
7311 {
7312# if defined(RT_ARCH_AMD64)
7313 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
7314 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
7315# else
7316 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
7317# endif
7318 offNative++;
7319 }
7320# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7321 }
7322 }
7323 else
7324#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
7325 {
7326 /*
7327 * No debug info, just disassemble the x86 code and then the native code.
7328 *
7329 * First the guest code:
7330 */
7331 for (unsigned i = 0; i < pTb->cRanges; i++)
7332 {
7333 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
7334 + (pTb->aRanges[i].idxPhysPage == 0
7335 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
7336 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
7337 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
7338 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
7339 unsigned off = pTb->aRanges[i].offOpcodes;
7340 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
7341 while (off < cbOpcodes)
7342 {
7343 uint32_t cbInstr = 1;
7344 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
7345 &pTb->pabOpcodes[off], cbOpcodes - off,
7346 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
7347 if (RT_SUCCESS(rc))
7348 {
7349 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7350 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7351 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7352 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7353 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
7354 GCPhysPc += cbInstr;
7355 off += cbInstr;
7356 }
7357 else
7358 {
7359 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
7360 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
7361 break;
7362 }
7363 }
7364 }
7365
7366 /*
7367 * Then the native code:
7368 */
7369 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
7370 while (offNative < cNative)
7371 {
7372 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
7373# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
7374 uint32_t cbInstr = sizeof(paNative[0]);
7375 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
7376 if (RT_SUCCESS(rc))
7377 {
7378# if defined(RT_ARCH_AMD64)
7379 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
7380 {
7381 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
7382 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
7383 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
7384 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
7385 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
7386 uInfo & 0x8000 ? " - recompiled" : "");
7387 else
7388 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
7389 }
7390 else
7391# endif
7392 {
7393# ifdef RT_ARCH_AMD64
7394 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7395 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7396 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7397 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7398# elif defined(RT_ARCH_ARM64)
7399 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
7400 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7401 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7402# else
7403# error "Port me"
7404# endif
7405 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
7406 }
7407 }
7408 else
7409 {
7410# if defined(RT_ARCH_AMD64)
7411 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
7412 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
7413# else
7414 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
7415# endif
7416 cbInstr = sizeof(paNative[0]);
7417 }
7418 offNative += cbInstr / sizeof(paNative[0]);
7419
7420# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7421 cs_insn *pInstr;
7422 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
7423 (uintptr_t)pNativeCur, 1, &pInstr);
7424 if (cInstrs > 0)
7425 {
7426 Assert(cInstrs == 1);
7427# if defined(RT_ARCH_AMD64)
7428 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
7429 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
7430# else
7431 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
7432 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
7433# endif
7434 offNative += pInstr->size / sizeof(*pNativeCur);
7435 cs_free(pInstr, cInstrs);
7436 }
7437 else
7438 {
7439# if defined(RT_ARCH_AMD64)
7440 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
7441 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
7442# else
7443 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
7444# endif
7445 offNative++;
7446 }
7447# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7448 }
7449 }
7450
7451#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
7452 /* Cleanup. */
7453 cs_close(&hDisasm);
7454#endif
7455}
7456
7457
7458/**
7459 * Recompiles the given threaded TB into a native one.
7460 *
7461 * In case of failure the translation block will be returned as-is.
7462 *
7463 * @returns pTb.
7464 * @param pVCpu The cross context virtual CPU structure of the calling
7465 * thread.
7466 * @param pTb The threaded translation to recompile to native.
7467 */
7468DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
7469{
7470 /*
7471 * The first time thru, we allocate the recompiler state, the other times
7472 * we just need to reset it before using it again.
7473 */
7474 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
7475 if (RT_LIKELY(pReNative))
7476 iemNativeReInit(pReNative, pTb);
7477 else
7478 {
7479 pReNative = iemNativeInit(pVCpu, pTb);
7480 AssertReturn(pReNative, pTb);
7481 }
7482
7483 /*
7484 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
7485 * for aborting if an error happens.
7486 */
7487 uint32_t cCallsLeft = pTb->Thrd.cCalls;
7488#ifdef LOG_ENABLED
7489 uint32_t const cCallsOrg = cCallsLeft;
7490#endif
7491 uint32_t off = 0;
7492 int rc = VINF_SUCCESS;
7493 IEMNATIVE_TRY_SETJMP(pReNative, rc)
7494 {
7495 /*
7496 * Emit prolog code (fixed).
7497 */
7498 off = iemNativeEmitProlog(pReNative, off);
7499
7500 /*
7501 * Convert the calls to native code.
7502 */
7503#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7504 int32_t iGstInstr = -1;
7505 uint32_t fExec = pTb->fFlags;
7506#endif
7507 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
7508 while (cCallsLeft-- > 0)
7509 {
7510 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
7511
7512 /*
7513 * Debug info and assembly markup.
7514 */
7515#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7516 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
7517 fExec = pCallEntry->auParams[0];
7518 iemNativeDbgInfoAddNativeOffset(pReNative, off);
7519 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
7520 {
7521 if (iGstInstr < (int32_t)pTb->cInstructions)
7522 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
7523 else
7524 Assert(iGstInstr == pTb->cInstructions);
7525 iGstInstr = pCallEntry->idxInstr;
7526 }
7527 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
7528#endif
7529#if defined(VBOX_STRICT)
7530 off = iemNativeEmitMarker(pReNative, off,
7531 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
7532 pCallEntry->enmFunction));
7533#endif
7534#if defined(VBOX_STRICT)
7535 iemNativeRegAssertSanity(pReNative);
7536#endif
7537
7538 /*
7539 * Actual work.
7540 */
7541 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
7542 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "" : "(todo)"));
7543 if (pfnRecom) /** @todo stats on this. */
7544 {
7545 //STAM_COUNTER_INC()
7546 off = pfnRecom(pReNative, off, pCallEntry);
7547 }
7548 else
7549 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
7550 Assert(off <= pReNative->cInstrBufAlloc);
7551 Assert(pReNative->cCondDepth == 0);
7552
7553 /*
7554 * Advance.
7555 */
7556 pCallEntry++;
7557 }
7558
7559 /*
7560 * Emit the epilog code.
7561 */
7562 uint32_t idxReturnLabel;
7563 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
7564
7565 /*
7566 * Generate special jump labels.
7567 */
7568 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
7569 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
7570 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
7571 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
7572 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
7573 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
7574 }
7575 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
7576 {
7577 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
7578 return pTb;
7579 }
7580 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
7581 Assert(off <= pReNative->cInstrBufAlloc);
7582
7583 /*
7584 * Make sure all labels has been defined.
7585 */
7586 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
7587#ifdef VBOX_STRICT
7588 uint32_t const cLabels = pReNative->cLabels;
7589 for (uint32_t i = 0; i < cLabels; i++)
7590 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
7591#endif
7592
7593 /*
7594 * Allocate executable memory, copy over the code we've generated.
7595 */
7596 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
7597 if (pTbAllocator->pDelayedFreeHead)
7598 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
7599
7600 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
7601 AssertReturn(paFinalInstrBuf, pTb);
7602 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
7603
7604 /*
7605 * Apply fixups.
7606 */
7607 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
7608 uint32_t const cFixups = pReNative->cFixups;
7609 for (uint32_t i = 0; i < cFixups; i++)
7610 {
7611 Assert(paFixups[i].off < off);
7612 Assert(paFixups[i].idxLabel < cLabels);
7613 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
7614 switch (paFixups[i].enmType)
7615 {
7616#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7617 case kIemNativeFixupType_Rel32:
7618 Assert(paFixups[i].off + 4 <= off);
7619 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7620 continue;
7621
7622#elif defined(RT_ARCH_ARM64)
7623 case kIemNativeFixupType_RelImm26At0:
7624 {
7625 Assert(paFixups[i].off < off);
7626 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7627 Assert(offDisp >= -262144 && offDisp < 262144);
7628 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
7629 continue;
7630 }
7631
7632 case kIemNativeFixupType_RelImm19At5:
7633 {
7634 Assert(paFixups[i].off < off);
7635 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7636 Assert(offDisp >= -262144 && offDisp < 262144);
7637 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
7638 continue;
7639 }
7640
7641 case kIemNativeFixupType_RelImm14At5:
7642 {
7643 Assert(paFixups[i].off < off);
7644 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7645 Assert(offDisp >= -8192 && offDisp < 8192);
7646 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
7647 continue;
7648 }
7649
7650#endif
7651 case kIemNativeFixupType_Invalid:
7652 case kIemNativeFixupType_End:
7653 break;
7654 }
7655 AssertFailed();
7656 }
7657
7658 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
7659
7660 /*
7661 * Convert the translation block.
7662 */
7663 //RT_BREAKPOINT();
7664 RTMemFree(pTb->Thrd.paCalls);
7665 pTb->Native.paInstructions = paFinalInstrBuf;
7666 pTb->Native.cInstructions = off;
7667 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
7668#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7669 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
7670 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
7671#endif
7672
7673 Assert(pTbAllocator->cThreadedTbs > 0);
7674 pTbAllocator->cThreadedTbs -= 1;
7675 pTbAllocator->cNativeTbs += 1;
7676 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
7677
7678#ifdef LOG_ENABLED
7679 /*
7680 * Disassemble to the log if enabled.
7681 */
7682 if (LogIs3Enabled())
7683 {
7684 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
7685 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
7686 }
7687#endif
7688
7689 return pTb;
7690}
7691
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette