VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101568

Last change on this file since 101568 was 101568, checked in by vboxsync, 18 months ago

VMM/IEM: Native IEM_MC_IF_EFL_ANY_BITS_SET translation. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 209.8 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101568 2023-10-24 00:42:06Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94
95#include "IEMInline.h"
96#include "IEMThreadedFunctions.h"
97#include "IEMN8veRecompiler.h"
98#include "IEMNativeFunctions.h"
99
100
101/*
102 * Narrow down configs here to avoid wasting time on unused configs here.
103 * Note! Same checks in IEMAllThrdRecompiler.cpp.
104 */
105
106#ifndef IEM_WITH_CODE_TLB
107# error The code TLB must be enabled for the recompiler.
108#endif
109
110#ifndef IEM_WITH_DATA_TLB
111# error The data TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_SETJMP
115# error The setjmp approach must be enabled for the recompiler.
116#endif
117
118
119/*********************************************************************************************************************************
120* Defined Constants And Macros *
121*********************************************************************************************************************************/
122/** Always count instructions for now. */
123#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
124
125
126/*********************************************************************************************************************************
127* Internal Functions *
128*********************************************************************************************************************************/
129static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
130 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT;
131static bool iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off) RT_NOEXCEPT;
132static bool iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData) RT_NOEXCEPT;
133
134
135/*********************************************************************************************************************************
136* Executable Memory Allocator *
137*********************************************************************************************************************************/
138/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
139 * Use an alternative chunk sub-allocator that does store internal data
140 * in the chunk.
141 *
142 * Using the RTHeapSimple is not practial on newer darwin systems where
143 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
144 * memory. We would have to change the protection of the whole chunk for
145 * every call to RTHeapSimple, which would be rather expensive.
146 *
147 * This alternative implemenation let restrict page protection modifications
148 * to the pages backing the executable memory we just allocated.
149 */
150#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151/** The chunk sub-allocation unit size in bytes. */
152#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
153/** The chunk sub-allocation unit size as a shift factor. */
154#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
155
156#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
157# ifdef IEMNATIVE_USE_GDB_JIT
158# define IEMNATIVE_USE_GDB_JIT_ET_DYN
159
160/** GDB JIT: Code entry. */
161typedef struct GDBJITCODEENTRY
162{
163 struct GDBJITCODEENTRY *pNext;
164 struct GDBJITCODEENTRY *pPrev;
165 uint8_t *pbSymFile;
166 uint64_t cbSymFile;
167} GDBJITCODEENTRY;
168
169/** GDB JIT: Actions. */
170typedef enum GDBJITACTIONS : uint32_t
171{
172 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
173} GDBJITACTIONS;
174
175/** GDB JIT: Descriptor. */
176typedef struct GDBJITDESCRIPTOR
177{
178 uint32_t uVersion;
179 GDBJITACTIONS enmAction;
180 GDBJITCODEENTRY *pRelevant;
181 GDBJITCODEENTRY *pHead;
182 /** Our addition: */
183 GDBJITCODEENTRY *pTail;
184} GDBJITDESCRIPTOR;
185
186/** GDB JIT: Our simple symbol file data. */
187typedef struct GDBJITSYMFILE
188{
189 Elf64_Ehdr EHdr;
190# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
191 Elf64_Shdr aShdrs[5];
192# else
193 Elf64_Shdr aShdrs[7];
194 Elf64_Phdr aPhdrs[2];
195# endif
196 /** The dwarf ehframe data for the chunk. */
197 uint8_t abEhFrame[512];
198 char szzStrTab[128];
199 Elf64_Sym aSymbols[3];
200# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Sym aDynSyms[2];
202 Elf64_Dyn aDyn[6];
203# endif
204} GDBJITSYMFILE;
205
206extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
207extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
208
209/** Init once for g_IemNativeGdbJitLock. */
210static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
211/** Init once for the critical section. */
212static RTCRITSECT g_IemNativeGdbJitLock;
213
214/** GDB reads the info here. */
215GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
216
217/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
218DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
219{
220 ASMNopPause();
221}
222
223/** @callback_method_impl{FNRTONCE} */
224static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
225{
226 RT_NOREF(pvUser);
227 return RTCritSectInit(&g_IemNativeGdbJitLock);
228}
229
230
231# endif /* IEMNATIVE_USE_GDB_JIT */
232
233/**
234 * Per-chunk unwind info for non-windows hosts.
235 */
236typedef struct IEMEXECMEMCHUNKEHFRAME
237{
238# ifdef IEMNATIVE_USE_LIBUNWIND
239 /** The offset of the FDA into abEhFrame. */
240 uintptr_t offFda;
241# else
242 /** 'struct object' storage area. */
243 uint8_t abObject[1024];
244# endif
245# ifdef IEMNATIVE_USE_GDB_JIT
246# if 0
247 /** The GDB JIT 'symbol file' data. */
248 GDBJITSYMFILE GdbJitSymFile;
249# endif
250 /** The GDB JIT list entry. */
251 GDBJITCODEENTRY GdbJitEntry;
252# endif
253 /** The dwarf ehframe data for the chunk. */
254 uint8_t abEhFrame[512];
255} IEMEXECMEMCHUNKEHFRAME;
256/** Pointer to per-chunk info info for non-windows hosts. */
257typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
258#endif
259
260
261/**
262 * An chunk of executable memory.
263 */
264typedef struct IEMEXECMEMCHUNK
265{
266#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
267 /** Number of free items in this chunk. */
268 uint32_t cFreeUnits;
269 /** Hint were to start searching for free space in the allocation bitmap. */
270 uint32_t idxFreeHint;
271#else
272 /** The heap handle. */
273 RTHEAPSIMPLE hHeap;
274#endif
275 /** Pointer to the chunk. */
276 void *pvChunk;
277#ifdef IN_RING3
278 /**
279 * Pointer to the unwind information.
280 *
281 * This is used during C++ throw and longjmp (windows and probably most other
282 * platforms). Some debuggers (windbg) makes use of it as well.
283 *
284 * Windows: This is allocated from hHeap on windows because (at least for
285 * AMD64) the UNWIND_INFO structure address in the
286 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
287 *
288 * Others: Allocated from the regular heap to avoid unnecessary executable data
289 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
290 void *pvUnwindInfo;
291#elif defined(IN_RING0)
292 /** Allocation handle. */
293 RTR0MEMOBJ hMemObj;
294#endif
295} IEMEXECMEMCHUNK;
296/** Pointer to a memory chunk. */
297typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
298
299
300/**
301 * Executable memory allocator for the native recompiler.
302 */
303typedef struct IEMEXECMEMALLOCATOR
304{
305 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
306 uint32_t uMagic;
307
308 /** The chunk size. */
309 uint32_t cbChunk;
310 /** The maximum number of chunks. */
311 uint32_t cMaxChunks;
312 /** The current number of chunks. */
313 uint32_t cChunks;
314 /** Hint where to start looking for available memory. */
315 uint32_t idxChunkHint;
316 /** Statistics: Current number of allocations. */
317 uint32_t cAllocations;
318
319 /** The total amount of memory available. */
320 uint64_t cbTotal;
321 /** Total amount of free memory. */
322 uint64_t cbFree;
323 /** Total amount of memory allocated. */
324 uint64_t cbAllocated;
325
326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
327 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
328 *
329 * Since the chunk size is a power of two and the minimum chunk size is a lot
330 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
331 * require a whole number of uint64_t elements in the allocation bitmap. So,
332 * for sake of simplicity, they are allocated as one continous chunk for
333 * simplicity/laziness. */
334 uint64_t *pbmAlloc;
335 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
336 uint32_t cUnitsPerChunk;
337 /** Number of bitmap elements per chunk (for quickly locating the bitmap
338 * portion corresponding to an chunk). */
339 uint32_t cBitmapElementsPerChunk;
340#else
341 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
342 * @{ */
343 /** The size of the heap internal block header. This is used to adjust the
344 * request memory size to make sure there is exacly enough room for a header at
345 * the end of the blocks we allocate before the next 64 byte alignment line. */
346 uint32_t cbHeapBlockHdr;
347 /** The size of initial heap allocation required make sure the first
348 * allocation is correctly aligned. */
349 uint32_t cbHeapAlignTweak;
350 /** The alignment tweak allocation address. */
351 void *pvAlignTweak;
352 /** @} */
353#endif
354
355#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
356 /** Pointer to the array of unwind info running parallel to aChunks (same
357 * allocation as this structure, located after the bitmaps).
358 * (For Windows, the structures must reside in 32-bit RVA distance to the
359 * actual chunk, so they are allocated off the chunk.) */
360 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
361#endif
362
363 /** The allocation chunks. */
364 RT_FLEXIBLE_ARRAY_EXTENSION
365 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
366} IEMEXECMEMALLOCATOR;
367/** Pointer to an executable memory allocator. */
368typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
369
370/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
371#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
372
373
374static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
375
376
377/**
378 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
379 * the heap statistics.
380 */
381static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
382 uint32_t cbReq, uint32_t idxChunk)
383{
384 pExecMemAllocator->cAllocations += 1;
385 pExecMemAllocator->cbAllocated += cbReq;
386#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
387 pExecMemAllocator->cbFree -= cbReq;
388#else
389 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
390#endif
391 pExecMemAllocator->idxChunkHint = idxChunk;
392
393#ifdef RT_OS_DARWIN
394 /*
395 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
396 * on darwin. So, we mark the pages returned as read+write after alloc and
397 * expect the caller to call iemExecMemAllocatorReadyForUse when done
398 * writing to the allocation.
399 *
400 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
401 * for details.
402 */
403 /** @todo detect if this is necessary... it wasn't required on 10.15 or
404 * whatever older version it was. */
405 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
406 AssertRC(rc);
407#endif
408
409 return pvRet;
410}
411
412
413#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
414static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
415 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
416{
417 /*
418 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
419 */
420 Assert(!(cToScan & 63));
421 Assert(!(idxFirst & 63));
422 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
423 pbmAlloc += idxFirst / 64;
424
425 /*
426 * Scan the bitmap for cReqUnits of consequtive clear bits
427 */
428 /** @todo This can probably be done more efficiently for non-x86 systems. */
429 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
430 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
431 {
432 uint32_t idxAddBit = 1;
433 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
434 idxAddBit++;
435 if (idxAddBit >= cReqUnits)
436 {
437 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
438
439 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
440 pChunk->cFreeUnits -= cReqUnits;
441 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
442
443 void * const pvRet = (uint8_t *)pChunk->pvChunk
444 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
445
446 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
447 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
448 }
449
450 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
451 }
452 return NULL;
453}
454#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
455
456
457static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
458{
459#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
460 /*
461 * Figure out how much to allocate.
462 */
463 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
464 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
465 {
466 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
467 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
468 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
469 {
470 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
471 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
472 if (pvRet)
473 return pvRet;
474 }
475 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
476 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
477 cReqUnits, idxChunk);
478 }
479#else
480 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
481 if (pvRet)
482 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
483#endif
484 return NULL;
485
486}
487
488
489/**
490 * Allocates @a cbReq bytes of executable memory.
491 *
492 * @returns Pointer to the memory, NULL if out of memory or other problem
493 * encountered.
494 * @param pVCpu The cross context virtual CPU structure of the calling
495 * thread.
496 * @param cbReq How many bytes are required.
497 */
498static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
499{
500 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
501 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
502 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
503
504 /*
505 * Adjust the request size so it'll fit the allocator alignment/whatnot.
506 *
507 * For the RTHeapSimple allocator this means to follow the logic described
508 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
509 * existing chunks if we think we've got sufficient free memory around.
510 *
511 * While for the alternative one we just align it up to a whole unit size.
512 */
513#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
514 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
515#else
516 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
517#endif
518 if (cbReq <= pExecMemAllocator->cbFree)
519 {
520 uint32_t const cChunks = pExecMemAllocator->cChunks;
521 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
522 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
523 {
524 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
525 if (pvRet)
526 return pvRet;
527 }
528 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
529 {
530 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
531 if (pvRet)
532 return pvRet;
533 }
534 }
535
536 /*
537 * Can we grow it with another chunk?
538 */
539 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
540 {
541 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
542 AssertLogRelRCReturn(rc, NULL);
543
544 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 AssertFailed();
549 }
550
551 /* What now? Prune native translation blocks from the cache? */
552 AssertFailed();
553 return NULL;
554}
555
556
557/** This is a hook that we may need later for changing memory protection back
558 * to readonly+exec */
559static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
560{
561#ifdef RT_OS_DARWIN
562 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
563 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
564 AssertRC(rc); RT_NOREF(pVCpu);
565
566 /*
567 * Flush the instruction cache:
568 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
569 */
570 /* sys_dcache_flush(pv, cb); - not necessary */
571 sys_icache_invalidate(pv, cb);
572#else
573 RT_NOREF(pVCpu, pv, cb);
574#endif
575}
576
577
578/**
579 * Frees executable memory.
580 */
581void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
582{
583 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
584 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
585 Assert(pv);
586#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
587 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
588#else
589 Assert(!((uintptr_t)pv & 63));
590#endif
591
592 /* Align the size as we did when allocating the block. */
593#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
594 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
595#else
596 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
597#endif
598
599 /* Free it / assert sanity. */
600#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
601 uint32_t const cChunks = pExecMemAllocator->cChunks;
602 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
603 bool fFound = false;
604 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
605 {
606 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
607 fFound = offChunk < cbChunk;
608 if (fFound)
609 {
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
612 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
613
614 /* Check that it's valid and free it. */
615 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
616 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
617 for (uint32_t i = 1; i < cReqUnits; i++)
618 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
619 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
620
621 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
622 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
623
624 /* Update the stats. */
625 pExecMemAllocator->cbAllocated -= cb;
626 pExecMemAllocator->cbFree += cb;
627 pExecMemAllocator->cAllocations -= 1;
628 return;
629#else
630 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
631 break;
632#endif
633 }
634 }
635# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
636 AssertFailed();
637# else
638 Assert(fFound);
639# endif
640#endif
641
642#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
643 /* Update stats while cb is freshly calculated.*/
644 pExecMemAllocator->cbAllocated -= cb;
645 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
646 pExecMemAllocator->cAllocations -= 1;
647
648 /* Free it. */
649 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
650#endif
651}
652
653
654
655#ifdef IN_RING3
656# ifdef RT_OS_WINDOWS
657
658/**
659 * Initializes the unwind info structures for windows hosts.
660 */
661static int
662iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
663 void *pvChunk, uint32_t idxChunk)
664{
665 RT_NOREF(pVCpu);
666
667 /*
668 * The AMD64 unwind opcodes.
669 *
670 * This is a program that starts with RSP after a RET instruction that
671 * ends up in recompiled code, and the operations we describe here will
672 * restore all non-volatile registers and bring RSP back to where our
673 * RET address is. This means it's reverse order from what happens in
674 * the prologue.
675 *
676 * Note! Using a frame register approach here both because we have one
677 * and but mainly because the UWOP_ALLOC_LARGE argument values
678 * would be a pain to write initializers for. On the positive
679 * side, we're impervious to changes in the the stack variable
680 * area can can deal with dynamic stack allocations if necessary.
681 */
682 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
683 {
684 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
685 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
686 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
687 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
688 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
689 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
690 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
691 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
692 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
693 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
694 };
695 union
696 {
697 IMAGE_UNWIND_INFO Info;
698 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
699 } s_UnwindInfo =
700 {
701 {
702 /* .Version = */ 1,
703 /* .Flags = */ 0,
704 /* .SizeOfProlog = */ 16, /* whatever */
705 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
706 /* .FrameRegister = */ X86_GREG_xBP,
707 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
708 }
709 };
710 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
711 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
712
713 /*
714 * Calc how much space we need and allocate it off the exec heap.
715 */
716 unsigned const cFunctionEntries = 1;
717 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
718 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
719# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
720 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
721 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
722 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
723# else
724 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
725 - pExecMemAllocator->cbHeapBlockHdr;
726 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
727 32 /*cbAlignment*/);
728# endif
729 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
730 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
731
732 /*
733 * Initialize the structures.
734 */
735 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
736
737 paFunctions[0].BeginAddress = 0;
738 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
739 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
740
741 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
742 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
743
744 /*
745 * Register it.
746 */
747 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
748 AssertReturn(fRet, NULL); /* Nothing to clean up on failure, since its within the chunk itself. */
749
750 return paFunctions;
751}
752
753
754# else /* !RT_OS_WINDOWS */
755
756/**
757 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
758 */
759DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
760{
761 if (iValue >= 64)
762 {
763 Assert(iValue < 0x2000);
764 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
765 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
766 }
767 else if (iValue >= 0)
768 *Ptr.pb++ = (uint8_t)iValue;
769 else if (iValue > -64)
770 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
771 else
772 {
773 Assert(iValue > -0x2000);
774 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
775 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
776 }
777 return Ptr;
778}
779
780
781/**
782 * Emits an ULEB128 encoded value (up to 64-bit wide).
783 */
784DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
785{
786 while (uValue >= 0x80)
787 {
788 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
789 uValue >>= 7;
790 }
791 *Ptr.pb++ = (uint8_t)uValue;
792 return Ptr;
793}
794
795
796/**
797 * Emits a CFA rule as register @a uReg + offset @a off.
798 */
799DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
800{
801 *Ptr.pb++ = DW_CFA_def_cfa;
802 Ptr = iemDwarfPutUleb128(Ptr, uReg);
803 Ptr = iemDwarfPutUleb128(Ptr, off);
804 return Ptr;
805}
806
807
808/**
809 * Emits a register (@a uReg) save location:
810 * CFA + @a off * data_alignment_factor
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
813{
814 if (uReg < 0x40)
815 *Ptr.pb++ = DW_CFA_offset | uReg;
816 else
817 {
818 *Ptr.pb++ = DW_CFA_offset_extended;
819 Ptr = iemDwarfPutUleb128(Ptr, uReg);
820 }
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826# if 0 /* unused */
827/**
828 * Emits a register (@a uReg) save location, using signed offset:
829 * CFA + @a offSigned * data_alignment_factor
830 */
831DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
832{
833 *Ptr.pb++ = DW_CFA_offset_extended_sf;
834 Ptr = iemDwarfPutUleb128(Ptr, uReg);
835 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
836 return Ptr;
837}
838# endif
839
840
841/**
842 * Initializes the unwind info section for non-windows hosts.
843 */
844static int
845iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
846 void *pvChunk, uint32_t idxChunk)
847{
848 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
849 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
850
851 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
852
853 /*
854 * Generate the CIE first.
855 */
856# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
857 uint8_t const iDwarfVer = 3;
858# else
859 uint8_t const iDwarfVer = 4;
860# endif
861 RTPTRUNION const PtrCie = Ptr;
862 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
863 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
864 *Ptr.pb++ = iDwarfVer; /* DwARF version */
865 *Ptr.pb++ = 0; /* Augmentation. */
866 if (iDwarfVer >= 4)
867 {
868 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
869 *Ptr.pb++ = 0; /* Segment selector size. */
870 }
871# ifdef RT_ARCH_AMD64
872 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
873# else
874 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
875# endif
876 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
877# ifdef RT_ARCH_AMD64
878 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
879# elif defined(RT_ARCH_ARM64)
880 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
881# else
882# error "port me"
883# endif
884 /* Initial instructions: */
885# ifdef RT_ARCH_AMD64
886 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
887 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
888 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
889 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
890 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
891 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
892 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
893 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
894# elif defined(RT_ARCH_ARM64)
895# if 1
896 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
897# else
898 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
899# endif
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
912 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
913 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
914# else
915# error "port me"
916# endif
917 while ((Ptr.u - PtrCie.u) & 3)
918 *Ptr.pb++ = DW_CFA_nop;
919 /* Finalize the CIE size. */
920 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
921
922 /*
923 * Generate an FDE for the whole chunk area.
924 */
925# ifdef IEMNATIVE_USE_LIBUNWIND
926 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
927# endif
928 RTPTRUNION const PtrFde = Ptr;
929 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
930 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
931 Ptr.pu32++;
932 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
933 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
934# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
935 *Ptr.pb++ = DW_CFA_nop;
936# endif
937 while ((Ptr.u - PtrFde.u) & 3)
938 *Ptr.pb++ = DW_CFA_nop;
939 /* Finalize the FDE size. */
940 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
941
942 /* Terminator entry. */
943 *Ptr.pu32++ = 0;
944 *Ptr.pu32++ = 0; /* just to be sure... */
945 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
946
947 /*
948 * Register it.
949 */
950# ifdef IEMNATIVE_USE_LIBUNWIND
951 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
952# else
953 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
954 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
955# endif
956
957# ifdef IEMNATIVE_USE_GDB_JIT
958 /*
959 * Now for telling GDB about this (experimental).
960 *
961 * This seems to work best with ET_DYN.
962 */
963 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
964# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
965 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
966 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
967# else
968 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
969 - pExecMemAllocator->cbHeapBlockHdr;
970 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
971# endif
972 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
973 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
974
975 RT_ZERO(*pSymFile);
976
977 /*
978 * The ELF header:
979 */
980 pSymFile->EHdr.e_ident[0] = ELFMAG0;
981 pSymFile->EHdr.e_ident[1] = ELFMAG1;
982 pSymFile->EHdr.e_ident[2] = ELFMAG2;
983 pSymFile->EHdr.e_ident[3] = ELFMAG3;
984 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
985 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
986 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
987 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
988# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
989 pSymFile->EHdr.e_type = ET_DYN;
990# else
991 pSymFile->EHdr.e_type = ET_REL;
992# endif
993# ifdef RT_ARCH_AMD64
994 pSymFile->EHdr.e_machine = EM_AMD64;
995# elif defined(RT_ARCH_ARM64)
996 pSymFile->EHdr.e_machine = EM_AARCH64;
997# else
998# error "port me"
999# endif
1000 pSymFile->EHdr.e_version = 1; /*?*/
1001 pSymFile->EHdr.e_entry = 0;
1002# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1003 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1004# else
1005 pSymFile->EHdr.e_phoff = 0;
1006# endif
1007 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1008 pSymFile->EHdr.e_flags = 0;
1009 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1010# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1011 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1012 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1013# else
1014 pSymFile->EHdr.e_phentsize = 0;
1015 pSymFile->EHdr.e_phnum = 0;
1016# endif
1017 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1018 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1019 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1020
1021 uint32_t offStrTab = 0;
1022#define APPEND_STR(a_szStr) do { \
1023 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1024 offStrTab += sizeof(a_szStr); \
1025 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1026 } while (0)
1027#define APPEND_STR_FMT(a_szStr, ...) do { \
1028 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1029 offStrTab++; \
1030 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1031 } while (0)
1032
1033 /*
1034 * Section headers.
1035 */
1036 /* Section header #0: NULL */
1037 unsigned i = 0;
1038 APPEND_STR("");
1039 RT_ZERO(pSymFile->aShdrs[i]);
1040 i++;
1041
1042 /* Section header: .eh_frame */
1043 pSymFile->aShdrs[i].sh_name = offStrTab;
1044 APPEND_STR(".eh_frame");
1045 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1046 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1047# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1048 pSymFile->aShdrs[i].sh_offset
1049 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1050# else
1051 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1052 pSymFile->aShdrs[i].sh_offset = 0;
1053# endif
1054
1055 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1056 pSymFile->aShdrs[i].sh_link = 0;
1057 pSymFile->aShdrs[i].sh_info = 0;
1058 pSymFile->aShdrs[i].sh_addralign = 1;
1059 pSymFile->aShdrs[i].sh_entsize = 0;
1060 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1061 i++;
1062
1063 /* Section header: .shstrtab */
1064 unsigned const iShStrTab = i;
1065 pSymFile->EHdr.e_shstrndx = iShStrTab;
1066 pSymFile->aShdrs[i].sh_name = offStrTab;
1067 APPEND_STR(".shstrtab");
1068 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1069 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1070# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1071 pSymFile->aShdrs[i].sh_offset
1072 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1073# else
1074 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1075 pSymFile->aShdrs[i].sh_offset = 0;
1076# endif
1077 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1078 pSymFile->aShdrs[i].sh_link = 0;
1079 pSymFile->aShdrs[i].sh_info = 0;
1080 pSymFile->aShdrs[i].sh_addralign = 1;
1081 pSymFile->aShdrs[i].sh_entsize = 0;
1082 i++;
1083
1084 /* Section header: .symbols */
1085 pSymFile->aShdrs[i].sh_name = offStrTab;
1086 APPEND_STR(".symtab");
1087 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1088 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1091 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1092 pSymFile->aShdrs[i].sh_link = iShStrTab;
1093 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1094 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1095 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1096 i++;
1097
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1099 /* Section header: .symbols */
1100 pSymFile->aShdrs[i].sh_name = offStrTab;
1101 APPEND_STR(".dynsym");
1102 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1103 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1104 pSymFile->aShdrs[i].sh_offset
1105 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1106 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1107 pSymFile->aShdrs[i].sh_link = iShStrTab;
1108 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1109 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1110 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1111 i++;
1112# endif
1113
1114# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1115 /* Section header: .dynamic */
1116 pSymFile->aShdrs[i].sh_name = offStrTab;
1117 APPEND_STR(".dynamic");
1118 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1119 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1120 pSymFile->aShdrs[i].sh_offset
1121 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1122 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1123 pSymFile->aShdrs[i].sh_link = iShStrTab;
1124 pSymFile->aShdrs[i].sh_info = 0;
1125 pSymFile->aShdrs[i].sh_addralign = 1;
1126 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1127 i++;
1128# endif
1129
1130 /* Section header: .text */
1131 unsigned const iShText = i;
1132 pSymFile->aShdrs[i].sh_name = offStrTab;
1133 APPEND_STR(".text");
1134 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1135 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1136# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1137 pSymFile->aShdrs[i].sh_offset
1138 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1139# else
1140 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1141 pSymFile->aShdrs[i].sh_offset = 0;
1142# endif
1143 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1144 pSymFile->aShdrs[i].sh_link = 0;
1145 pSymFile->aShdrs[i].sh_info = 0;
1146 pSymFile->aShdrs[i].sh_addralign = 1;
1147 pSymFile->aShdrs[i].sh_entsize = 0;
1148 i++;
1149
1150 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1151
1152# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1153 /*
1154 * The program headers:
1155 */
1156 /* Everything in a single LOAD segment: */
1157 i = 0;
1158 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1159 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1160 pSymFile->aPhdrs[i].p_offset
1161 = pSymFile->aPhdrs[i].p_vaddr
1162 = pSymFile->aPhdrs[i].p_paddr = 0;
1163 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1164 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1165 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1166 i++;
1167 /* The .dynamic segment. */
1168 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1169 pSymFile->aPhdrs[i].p_flags = PF_R;
1170 pSymFile->aPhdrs[i].p_offset
1171 = pSymFile->aPhdrs[i].p_vaddr
1172 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1173 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1174 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1175 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1179
1180 /*
1181 * The dynamic section:
1182 */
1183 i = 0;
1184 pSymFile->aDyn[i].d_tag = DT_SONAME;
1185 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1186 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1187 i++;
1188 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1189 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1190 i++;
1191 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1192 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1193 i++;
1194 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1195 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1196 i++;
1197 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1198 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1199 i++;
1200 pSymFile->aDyn[i].d_tag = DT_NULL;
1201 i++;
1202 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1203# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1204
1205 /*
1206 * Symbol tables:
1207 */
1208 /** @todo gdb doesn't seem to really like this ... */
1209 i = 0;
1210 pSymFile->aSymbols[i].st_name = 0;
1211 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1212 pSymFile->aSymbols[i].st_value = 0;
1213 pSymFile->aSymbols[i].st_size = 0;
1214 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1215 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1217 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1218# endif
1219 i++;
1220
1221 pSymFile->aSymbols[i].st_name = 0;
1222 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1223 pSymFile->aSymbols[i].st_value = 0;
1224 pSymFile->aSymbols[i].st_size = 0;
1225 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1226 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1227 i++;
1228
1229 pSymFile->aSymbols[i].st_name = offStrTab;
1230 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1231# if 0
1232 pSymFile->aSymbols[i].st_shndx = iShText;
1233 pSymFile->aSymbols[i].st_value = 0;
1234# else
1235 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1236 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1237# endif
1238 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1239 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1240 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1241# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1242 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1243 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1244# endif
1245 i++;
1246
1247 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1248 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1249
1250 /*
1251 * The GDB JIT entry and informing GDB.
1252 */
1253 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1254# if 1
1255 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1256# else
1257 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1258# endif
1259
1260 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1261 RTCritSectEnter(&g_IemNativeGdbJitLock);
1262 pEhFrame->GdbJitEntry.pNext = NULL;
1263 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1264 if (__jit_debug_descriptor.pTail)
1265 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1266 else
1267 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1268 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1269 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1270
1271 /* Notify GDB: */
1272 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1273 __jit_debug_register_code();
1274 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1275 RTCritSectLeave(&g_IemNativeGdbJitLock);
1276
1277# else /* !IEMNATIVE_USE_GDB_JIT */
1278 RT_NOREF(pVCpu);
1279# endif /* !IEMNATIVE_USE_GDB_JIT */
1280
1281 return VINF_SUCCESS;
1282}
1283
1284# endif /* !RT_OS_WINDOWS */
1285#endif /* IN_RING3 */
1286
1287
1288/**
1289 * Adds another chunk to the executable memory allocator.
1290 *
1291 * This is used by the init code for the initial allocation and later by the
1292 * regular allocator function when it's out of memory.
1293 */
1294static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1295{
1296 /* Check that we've room for growth. */
1297 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1298 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1299
1300 /* Allocate a chunk. */
1301#ifdef RT_OS_DARWIN
1302 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1303#else
1304 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1305#endif
1306 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1307
1308#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1309 int rc = VINF_SUCCESS;
1310#else
1311 /* Initialize the heap for the chunk. */
1312 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1313 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1314 AssertRC(rc);
1315 if (RT_SUCCESS(rc))
1316 {
1317 /*
1318 * We want the memory to be aligned on 64 byte, so the first time thru
1319 * here we do some exploratory allocations to see how we can achieve this.
1320 * On subsequent runs we only make an initial adjustment allocation, if
1321 * necessary.
1322 *
1323 * Since we own the heap implementation, we know that the internal block
1324 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1325 * so all we need to wrt allocation size adjustments is to add 32 bytes
1326 * to the size, align up by 64 bytes, and subtract 32 bytes.
1327 *
1328 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1329 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1330 * allocation to force subsequent allocations to return 64 byte aligned
1331 * user areas.
1332 */
1333 if (!pExecMemAllocator->cbHeapBlockHdr)
1334 {
1335 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1336 pExecMemAllocator->cbHeapAlignTweak = 64;
1337 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1338 32 /*cbAlignment*/);
1339 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1340
1341 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1342 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1343 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1344 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1345 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1346
1347 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1348 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1349 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1350 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1351 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1352
1353 RTHeapSimpleFree(hHeap, pvTest2);
1354 RTHeapSimpleFree(hHeap, pvTest1);
1355 }
1356 else
1357 {
1358 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1359 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1360 }
1361 if (RT_SUCCESS(rc))
1362#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1363 {
1364 /*
1365 * Add the chunk.
1366 *
1367 * This must be done before the unwind init so windows can allocate
1368 * memory from the chunk when using the alternative sub-allocator.
1369 */
1370 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1371#ifdef IN_RING3
1372 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1373#endif
1374#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1375 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1376#else
1377 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1378 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1379 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1380 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1381#endif
1382
1383 pExecMemAllocator->cChunks = idxChunk + 1;
1384 pExecMemAllocator->idxChunkHint = idxChunk;
1385
1386#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1387 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1388 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1389#else
1390 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1391 pExecMemAllocator->cbTotal += cbFree;
1392 pExecMemAllocator->cbFree += cbFree;
1393#endif
1394
1395#ifdef IN_RING3
1396 /*
1397 * Initialize the unwind information (this cannot really fail atm).
1398 * (This sets pvUnwindInfo.)
1399 */
1400 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1401 if (RT_SUCCESS(rc))
1402#endif
1403 {
1404 return VINF_SUCCESS;
1405 }
1406
1407#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1408 /* Just in case the impossible happens, undo the above up: */
1409 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1410 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1411 pExecMemAllocator->cChunks = idxChunk;
1412 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1413 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1414 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1415 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1416#endif
1417 }
1418#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1419 }
1420#endif
1421 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1422 RT_NOREF(pVCpu);
1423 return rc;
1424}
1425
1426
1427/**
1428 * Initializes the executable memory allocator for native recompilation on the
1429 * calling EMT.
1430 *
1431 * @returns VBox status code.
1432 * @param pVCpu The cross context virtual CPU structure of the calling
1433 * thread.
1434 * @param cbMax The max size of the allocator.
1435 * @param cbInitial The initial allocator size.
1436 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1437 * dependent).
1438 */
1439int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1440{
1441 /*
1442 * Validate input.
1443 */
1444 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1445 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1446 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1447 || cbChunk == 0
1448 || ( RT_IS_POWER_OF_TWO(cbChunk)
1449 && cbChunk >= _1M
1450 && cbChunk <= _256M
1451 && cbChunk <= cbMax),
1452 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1453 VERR_OUT_OF_RANGE);
1454
1455 /*
1456 * Adjust/figure out the chunk size.
1457 */
1458 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1459 {
1460 if (cbMax >= _256M)
1461 cbChunk = _64M;
1462 else
1463 {
1464 if (cbMax < _16M)
1465 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1466 else
1467 cbChunk = (uint32_t)cbMax / 4;
1468 if (!RT_IS_POWER_OF_TWO(cbChunk))
1469 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1470 }
1471 }
1472
1473 if (cbChunk > cbMax)
1474 cbMax = cbChunk;
1475 else
1476 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1477 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1478 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1479
1480 /*
1481 * Allocate and initialize the allocatore instance.
1482 */
1483 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1484#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1485 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1486 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1487 cbNeeded += cbBitmap * cMaxChunks;
1488 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1489 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1490#endif
1491#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1492 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1493 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1494#endif
1495 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1496 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1497 VERR_NO_MEMORY);
1498 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1499 pExecMemAllocator->cbChunk = cbChunk;
1500 pExecMemAllocator->cMaxChunks = cMaxChunks;
1501 pExecMemAllocator->cChunks = 0;
1502 pExecMemAllocator->idxChunkHint = 0;
1503 pExecMemAllocator->cAllocations = 0;
1504 pExecMemAllocator->cbTotal = 0;
1505 pExecMemAllocator->cbFree = 0;
1506 pExecMemAllocator->cbAllocated = 0;
1507#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1508 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1509 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1510 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1511 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1512#endif
1513#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1514 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1515#endif
1516 for (uint32_t i = 0; i < cMaxChunks; i++)
1517 {
1518#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1519 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1520 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1521#else
1522 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1523#endif
1524 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1525#ifdef IN_RING0
1526 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1527#else
1528 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1529#endif
1530 }
1531 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1532
1533 /*
1534 * Do the initial allocations.
1535 */
1536 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1537 {
1538 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1539 AssertLogRelRCReturn(rc, rc);
1540 }
1541
1542 pExecMemAllocator->idxChunkHint = 0;
1543
1544 return VINF_SUCCESS;
1545}
1546
1547
1548/*********************************************************************************************************************************
1549* Native Recompilation *
1550*********************************************************************************************************************************/
1551
1552
1553/**
1554 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1555 */
1556IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1557{
1558 pVCpu->iem.s.cInstructions += idxInstr;
1559 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1560}
1561
1562
1563/**
1564 * Used by TB code when it wants to raise a \#GP(0).
1565 */
1566IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1567{
1568 pVCpu->iem.s.cInstructions += idxInstr;
1569 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1570 return VINF_IEM_RAISED_XCPT; /* not reached */
1571}
1572
1573
1574/**
1575 * Reinitializes the native recompiler state.
1576 *
1577 * Called before starting a new recompile job.
1578 */
1579static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1580{
1581 pReNative->cLabels = 0;
1582 pReNative->bmLabelTypes = 0;
1583 pReNative->cFixups = 0;
1584#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1585 pReNative->pDbgInfo->cEntries = 0;
1586#endif
1587 pReNative->pTbOrg = pTb;
1588 pReNative->cCondDepth = 0;
1589 pReNative->uCondSeqNo = 0;
1590
1591 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1592#if IEMNATIVE_HST_GREG_COUNT < 32
1593 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1594#endif
1595 ;
1596 pReNative->Core.bmHstRegsWithGstShadow = 0;
1597 pReNative->Core.bmGstRegShadows = 0;
1598 pReNative->Core.bmVars = 0;
1599 pReNative->Core.u64ArgVars = UINT64_MAX;
1600
1601 /* Full host register reinit: */
1602 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1603 {
1604 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1605 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1606 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1607 }
1608
1609 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1610 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1611#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1612 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1613#endif
1614#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1615 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1616#endif
1617 );
1618 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1619 {
1620 fRegs &= ~RT_BIT_32(idxReg);
1621 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1622 }
1623
1624 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1625#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1626 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1627#endif
1628#ifdef IEMNATIVE_REG_FIXED_TMP0
1629 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1630#endif
1631 return pReNative;
1632}
1633
1634
1635/**
1636 * Allocates and initializes the native recompiler state.
1637 *
1638 * This is called the first time an EMT wants to recompile something.
1639 *
1640 * @returns Pointer to the new recompiler state.
1641 * @param pVCpu The cross context virtual CPU structure of the calling
1642 * thread.
1643 * @param pTb The TB that's about to be recompiled.
1644 * @thread EMT(pVCpu)
1645 */
1646static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1647{
1648 VMCPU_ASSERT_EMT(pVCpu);
1649
1650 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1651 AssertReturn(pReNative, NULL);
1652
1653 /*
1654 * Try allocate all the buffers and stuff we need.
1655 */
1656 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1657 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1658 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1659#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1660 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1661#endif
1662 if (RT_LIKELY( pReNative->pInstrBuf
1663 && pReNative->paLabels
1664 && pReNative->paFixups)
1665#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1666 && pReNative->pDbgInfo
1667#endif
1668 )
1669 {
1670 /*
1671 * Set the buffer & array sizes on success.
1672 */
1673 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1674 pReNative->cLabelsAlloc = _8K;
1675 pReNative->cFixupsAlloc = _16K;
1676#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1677 pReNative->cDbgInfoAlloc = _16K;
1678#endif
1679
1680 /*
1681 * Done, just need to save it and reinit it.
1682 */
1683 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1684 return iemNativeReInit(pReNative, pTb);
1685 }
1686
1687 /*
1688 * Failed. Cleanup and return.
1689 */
1690 AssertFailed();
1691 RTMemFree(pReNative->pInstrBuf);
1692 RTMemFree(pReNative->paLabels);
1693 RTMemFree(pReNative->paFixups);
1694#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1695 RTMemFree(pReNative->pDbgInfo);
1696#endif
1697 RTMemFree(pReNative);
1698 return NULL;
1699}
1700
1701
1702/**
1703 * Creates a label
1704 *
1705 * If the label does not yet have a defined position,
1706 * call iemNativeLabelDefine() later to set it.
1707 *
1708 * @returns Label ID.
1709 * @param pReNative The native recompile state.
1710 * @param enmType The label type.
1711 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1712 * label is not yet defined (default).
1713 * @param uData Data associated with the lable. Only applicable to
1714 * certain type of labels. Default is zero.
1715 */
1716DECLHIDDEN(uint32_t) iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1717 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
1718{
1719 /*
1720 * Locate existing label definition.
1721 *
1722 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1723 * and uData is zero.
1724 */
1725 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1726 uint32_t const cLabels = pReNative->cLabels;
1727 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1728#ifndef VBOX_STRICT
1729 && offWhere == UINT32_MAX
1730 && uData == 0
1731#endif
1732 )
1733 {
1734 /** @todo Since this is only used for labels with uData = 0, just use a
1735 * lookup array? */
1736 for (uint32_t i = 0; i < cLabels; i++)
1737 if ( paLabels[i].enmType == enmType
1738 && paLabels[i].uData == uData)
1739 {
1740#ifdef VBOX_STRICT
1741 AssertReturn(uData == 0, UINT32_MAX);
1742 AssertReturn(offWhere == UINT32_MAX, UINT32_MAX);
1743#endif
1744 AssertReturn(paLabels[i].off == UINT32_MAX, UINT32_MAX);
1745 return i;
1746 }
1747 }
1748
1749 /*
1750 * Make sure we've got room for another label.
1751 */
1752 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1753 { /* likely */ }
1754 else
1755 {
1756 uint32_t cNew = pReNative->cLabelsAlloc;
1757 AssertReturn(cNew, UINT32_MAX);
1758 AssertReturn(cLabels == cNew, UINT32_MAX);
1759 cNew *= 2;
1760 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1761 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1762 AssertReturn(paLabels, UINT32_MAX);
1763 pReNative->paLabels = paLabels;
1764 pReNative->cLabelsAlloc = cNew;
1765 }
1766
1767 /*
1768 * Define a new label.
1769 */
1770 paLabels[cLabels].off = offWhere;
1771 paLabels[cLabels].enmType = enmType;
1772 paLabels[cLabels].uData = uData;
1773 pReNative->cLabels = cLabels + 1;
1774
1775 Assert(enmType >= 0 && enmType < 64);
1776 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1777
1778 if (offWhere != UINT32_MAX)
1779 {
1780#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1781 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1782 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1783#endif
1784 }
1785 return cLabels;
1786}
1787
1788
1789/**
1790 * Defines the location of an existing label.
1791 *
1792 * @param pReNative The native recompile state.
1793 * @param idxLabel The label to define.
1794 * @param offWhere The position.
1795 */
1796DECLHIDDEN(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere) RT_NOEXCEPT
1797{
1798 AssertReturnVoid(idxLabel < pReNative->cLabels);
1799 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1800 AssertReturnVoid(pLabel->off == UINT32_MAX);
1801 pLabel->off = offWhere;
1802#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1803 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1804 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1805#endif
1806}
1807
1808
1809/**
1810 * Looks up a lable.
1811 *
1812 * @returns Label ID if found, UINT32_MAX if not.
1813 */
1814static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1815 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1816{
1817 Assert(enmType >= 0 && enmType < 64);
1818 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1819 {
1820 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1821 uint32_t const cLabels = pReNative->cLabels;
1822 for (uint32_t i = 0; i < cLabels; i++)
1823 if ( paLabels[i].enmType == enmType
1824 && paLabels[i].uData == uData
1825 && ( paLabels[i].off == offWhere
1826 || offWhere == UINT32_MAX
1827 || paLabels[i].off == UINT32_MAX))
1828 return i;
1829 }
1830 return UINT32_MAX;
1831}
1832
1833
1834
1835/**
1836 * Adds a fixup.
1837 *
1838 * @returns Success indicator.
1839 * @param pReNative The native recompile state.
1840 * @param offWhere The instruction offset of the fixup location.
1841 * @param idxLabel The target label ID for the fixup.
1842 * @param enmType The fixup type.
1843 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1844 */
1845DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1846 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/) RT_NOEXCEPT
1847{
1848 Assert(idxLabel <= UINT16_MAX);
1849 Assert((unsigned)enmType <= UINT8_MAX);
1850
1851 /*
1852 * Make sure we've room.
1853 */
1854 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1855 uint32_t const cFixups = pReNative->cFixups;
1856 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1857 { /* likely */ }
1858 else
1859 {
1860 uint32_t cNew = pReNative->cFixupsAlloc;
1861 AssertReturn(cNew, false);
1862 AssertReturn(cFixups == cNew, false);
1863 cNew *= 2;
1864 AssertReturn(cNew <= _128K, false);
1865 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1866 AssertReturn(paFixups, false);
1867 pReNative->paFixups = paFixups;
1868 pReNative->cFixupsAlloc = cNew;
1869 }
1870
1871 /*
1872 * Add the fixup.
1873 */
1874 paFixups[cFixups].off = offWhere;
1875 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1876 paFixups[cFixups].enmType = enmType;
1877 paFixups[cFixups].offAddend = offAddend;
1878 pReNative->cFixups = cFixups + 1;
1879 return true;
1880}
1881
1882/**
1883 * Slow code path for iemNativeInstrBufEnsure.
1884 */
1885DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1886 uint32_t cInstrReq) RT_NOEXCEPT
1887{
1888 /* Double the buffer size till we meet the request. */
1889 uint32_t cNew = pReNative->cInstrBufAlloc;
1890 AssertReturn(cNew > 0, NULL);
1891 do
1892 cNew *= 2;
1893 while (cNew < off + cInstrReq);
1894
1895 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1896#if RT_ARCH_ARM64
1897 AssertReturn(cbNew <= _1M, NULL); /* Limited by the branch instruction range (18+2 bits). */
1898#else
1899 AssertReturn(cbNew <= _2M, NULL);
1900#endif
1901
1902 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1903 AssertReturn(pvNew, NULL);
1904
1905 pReNative->cInstrBufAlloc = cNew;
1906 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1907}
1908
1909#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1910
1911/**
1912 * Grows the static debug info array used during recompilation.
1913 * @returns Pointer to the new debug info block, NULL on failure.
1914 */
1915DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo) RT_NOEXCEPT
1916{
1917 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1918 AssertReturn(cNew < _1M && cNew != 0, NULL);
1919 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1920 AssertReturn(pDbgInfo, NULL);
1921 pReNative->pDbgInfo = pDbgInfo;
1922 pReNative->cDbgInfoAlloc = cNew;
1923 return pDbgInfo;
1924}
1925
1926
1927/**
1928 * Adds a new debug info uninitialized entry, returning the pointer to it.
1929 */
1930DECLINLINE(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1931{
1932 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1933 { /* likely */ }
1934 else
1935 {
1936 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1937 AssertReturn(pDbgInfo, NULL);
1938 }
1939 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1940}
1941
1942
1943/**
1944 * Debug Info: Adds a native offset record, if necessary.
1945 */
1946static bool iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off) RT_NOEXCEPT
1947{
1948 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1949
1950 /*
1951 * Search backwards to see if we've got a similar record already.
1952 */
1953 uint32_t idx = pDbgInfo->cEntries;
1954 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1955 while (idx-- > idxStop)
1956 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1957 {
1958 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1959 return true;
1960 AssertReturn(pDbgInfo->aEntries[idx].NativeOffset.offNative < off, false);
1961 break;
1962 }
1963
1964 /*
1965 * Add it.
1966 */
1967 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1968 AssertReturn(pEntry, false);
1969 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1970 pEntry->NativeOffset.offNative = off;
1971
1972 return true;
1973}
1974
1975
1976/**
1977 * Debug Info: Record info about a label.
1978 */
1979static bool iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData) RT_NOEXCEPT
1980{
1981 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1982 AssertReturn(pEntry, false);
1983
1984 pEntry->Label.uType = kIemTbDbgEntryType_Label;
1985 pEntry->Label.uUnused = 0;
1986 pEntry->Label.enmLabel = (uint8_t)enmType;
1987 pEntry->Label.uData = uData;
1988
1989 return true;
1990}
1991
1992
1993/**
1994 * Debug Info: Record info about a threaded call.
1995 */
1996static bool iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall) RT_NOEXCEPT
1997{
1998 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1999 AssertReturn(pEntry, false);
2000
2001 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2002 pEntry->ThreadedCall.uUnused = 0;
2003 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2004
2005 return true;
2006}
2007
2008
2009/**
2010 * Debug Info: Record info about a new guest instruction.
2011 */
2012static bool iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec) RT_NOEXCEPT
2013{
2014 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2015 AssertReturn(pEntry, false);
2016
2017 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2018 pEntry->GuestInstruction.uUnused = 0;
2019 pEntry->GuestInstruction.fExec = fExec;
2020
2021 return true;
2022}
2023
2024
2025/**
2026 * Debug Info: Record info about guest register shadowing.
2027 */
2028static bool iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2029 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX) RT_NOEXCEPT
2030{
2031 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2032 AssertReturn(pEntry, false);
2033
2034 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2035 pEntry->GuestRegShadowing.uUnused = 0;
2036 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2037 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2038 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2039
2040 return true;
2041}
2042
2043#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2044
2045
2046/*********************************************************************************************************************************
2047* Register Allocator *
2048*********************************************************************************************************************************/
2049
2050/**
2051 * Register parameter indexes (indexed by argument number).
2052 */
2053DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2054{
2055 IEMNATIVE_CALL_ARG0_GREG,
2056 IEMNATIVE_CALL_ARG1_GREG,
2057 IEMNATIVE_CALL_ARG2_GREG,
2058 IEMNATIVE_CALL_ARG3_GREG,
2059#if defined(IEMNATIVE_CALL_ARG4_GREG)
2060 IEMNATIVE_CALL_ARG4_GREG,
2061# if defined(IEMNATIVE_CALL_ARG5_GREG)
2062 IEMNATIVE_CALL_ARG5_GREG,
2063# if defined(IEMNATIVE_CALL_ARG6_GREG)
2064 IEMNATIVE_CALL_ARG6_GREG,
2065# if defined(IEMNATIVE_CALL_ARG7_GREG)
2066 IEMNATIVE_CALL_ARG7_GREG,
2067# endif
2068# endif
2069# endif
2070#endif
2071};
2072
2073/**
2074 * Call register masks indexed by argument count.
2075 */
2076DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2077{
2078 0,
2079 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2080 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2081 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2083 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2084#if defined(IEMNATIVE_CALL_ARG4_GREG)
2085 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2086 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2087# if defined(IEMNATIVE_CALL_ARG5_GREG)
2088 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2089 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2090# if defined(IEMNATIVE_CALL_ARG6_GREG)
2091 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2092 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2093 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2094# if defined(IEMNATIVE_CALL_ARG7_GREG)
2095 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2096 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2097 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2098# endif
2099# endif
2100# endif
2101#endif
2102};
2103
2104/**
2105 * Info about shadowed guest register values.
2106 * @see IEMNATIVEGSTREG
2107 */
2108static struct
2109{
2110 /** Offset in VMCPU. */
2111 uint32_t off;
2112 /** The field size. */
2113 uint8_t cb;
2114 /** Name (for logging). */
2115 const char *pszName;
2116} const g_aGstShadowInfo[] =
2117{
2118#define CPUMCTX_OFF_AND_SIZE(a_Reg) RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2119 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2120 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2121 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2122 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2123 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2124 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2125 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2126 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2127 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2128 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2129 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2130 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2131 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2132 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2133 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2134 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2135 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2136 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2137 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2138 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2139 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2140 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2141 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2142 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2143 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2144 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2145 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2146 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2147 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2148 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2149 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2150 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2151 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2152 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2153 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2154 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2155 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2156 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2157 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2158 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2159 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2160 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2161#undef CPUMCTX_OFF_AND_SIZE
2162};
2163AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2164
2165
2166/** Host CPU general purpose register names. */
2167const char * const g_apszIemNativeHstRegNames[] =
2168{
2169#ifdef RT_ARCH_AMD64
2170 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2171#elif RT_ARCH_ARM64
2172 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2173 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2174#else
2175# error "port me"
2176#endif
2177};
2178
2179
2180DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2181 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2182{
2183 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2184
2185 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2186 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2187 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2188 return (uint8_t)idxReg;
2189}
2190
2191
2192/**
2193 * Locate a register, possibly freeing one up.
2194 *
2195 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2196 * failed.
2197 */
2198static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fAllowVolatile) RT_NOEXCEPT
2199{
2200 uint32_t fRegMask = fAllowVolatile
2201 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
2202 : IEMNATIVE_HST_GREG_MASK & ~(IEMNATIVE_REG_FIXED_MASK | IEMNATIVE_CALL_VOLATILE_GREG_MASK);
2203
2204 /*
2205 * Try a freed register that's shadowing a guest register
2206 */
2207 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2208 if (fRegs)
2209 {
2210 /** @todo pick better here: */
2211 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2212
2213 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2214 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2215 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2216 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2217
2218 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2219 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2220 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2221 return idxReg;
2222 }
2223
2224 /*
2225 * Try free up a variable that's in a register.
2226 *
2227 * We do two rounds here, first evacuating variables we don't need to be
2228 * saved on the stack, then in the second round move things to the stack.
2229 */
2230 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2231 {
2232 uint32_t fVars = pReNative->Core.bmVars;
2233 while (fVars)
2234 {
2235 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2236 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2237 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2238 && (RT_BIT_32(idxReg) & fRegMask)
2239 && ( iLoop == 0
2240 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2241 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2242 {
2243 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2244 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2245 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2246 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2247 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2248
2249 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2250 {
2251 AssertReturn(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, UINT8_MAX);
2252 uint32_t off = *poff;
2253 *poff = off = iemNativeEmitStoreGprByBp(pReNative, off,
2254 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2255 - IEMNATIVE_FP_OFF_STACK_VARS,
2256 idxReg);
2257 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2258 }
2259
2260 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2261 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2262 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2263 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2264 return idxReg;
2265 }
2266 fVars &= ~RT_BIT_32(idxVar);
2267 }
2268 }
2269
2270 AssertFailedReturn(UINT8_MAX);
2271}
2272
2273
2274/**
2275 * Moves a variable to a different register or spills it onto the stack.
2276 *
2277 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2278 * kinds can easily be recreated if needed later.
2279 *
2280 * @returns The new code buffer position, UINT32_MAX on failure.
2281 * @param pReNative The native recompile state.
2282 * @param off The current code buffer position.
2283 * @param idxVar The variable index.
2284 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2285 * call-volatile registers.
2286 */
2287static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2288 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2289{
2290 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2291 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2292
2293 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2294 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2295 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2296 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2297 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2298 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2299 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2300 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2301
2302
2303 /** @todo Add statistics on this.*/
2304 /** @todo Implement basic variable liveness analysis (python) so variables
2305 * can be freed immediately once no longer used. This has the potential to
2306 * be trashing registers and stack for dead variables. */
2307
2308 /*
2309 * First try move it to a different register, as that's cheaper.
2310 */
2311 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2312 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2313 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2314 if (fRegs)
2315 {
2316 /* Avoid using shadow registers, if possible. */
2317 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2318 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2319 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2320
2321 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2322 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2323 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2324 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2325 if (fGstRegShadows)
2326 {
2327 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2328 while (fGstRegShadows)
2329 {
2330 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows);
2331 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2332
2333 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2334 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2335 }
2336 }
2337
2338 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2339 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2340 }
2341 /*
2342 * Otherwise we must spill the register onto the stack.
2343 */
2344 else
2345 {
2346 AssertReturn(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, UINT32_MAX);
2347 off = iemNativeEmitStoreGprByBp(pReNative, off,
2348 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2349 - IEMNATIVE_FP_OFF_STACK_VARS,
2350 idxRegOld);
2351 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2352
2353 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2354 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2355 }
2356
2357 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2358 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2359 return off;
2360}
2361
2362
2363/**
2364 * Allocates a temporary host general purpose register.
2365 *
2366 * This may emit code to save register content onto the stack in order to free
2367 * up a register.
2368 *
2369 * @returns The host register number, UINT8_MAX on failure.
2370 * @param pReNative The native recompile state.
2371 * @param poff Pointer to the variable with the code buffer position.
2372 * This will be update if we need to move a variable from
2373 * register to stack in order to satisfy the request.
2374 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2375 * registers (@c true, default) or the other way around
2376 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2377 */
2378DECLHIDDEN(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2379 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2380{
2381 /*
2382 * Try find a completely unused register, preferably a call-volatile one.
2383 */
2384 uint8_t idxReg;
2385 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2386 & ~pReNative->Core.bmHstRegsWithGstShadow
2387 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2388 if (fRegs)
2389 {
2390 if (fPreferVolatile)
2391 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2392 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2393 else
2394 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2395 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2396 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2397 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2398 }
2399 else
2400 {
2401 idxReg = iemNativeRegAllocFindFree(pReNative, poff, true /*fAllowVolatile*/);
2402 AssertReturn(idxReg != UINT8_MAX, UINT8_MAX);
2403 }
2404 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2405}
2406
2407
2408/**
2409 * Allocates a temporary register for loading an immediate value into.
2410 *
2411 * This will emit code to load the immediate, unless there happens to be an
2412 * unused register with the value already loaded.
2413 *
2414 * The caller will not modify the returned register, it must be considered
2415 * read-only. Free using iemNativeRegFreeTmpImm.
2416 *
2417 * @returns The host register number, UINT8_MAX on failure.
2418 * @param pReNative The native recompile state.
2419 * @param poff Pointer to the variable with the code buffer position.
2420 * @param uImm The immediate value that the register must hold upon
2421 * return.
2422 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2423 * registers (@c true, default) or the other way around
2424 * (@c false).
2425 *
2426 * @note Reusing immediate values has not been implemented yet.
2427 */
2428DECLHIDDEN(uint8_t) iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm,
2429 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2430{
2431 uint8_t idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2432 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
2433 {
2434 uint32_t off = *poff;
2435 *poff = off = iemNativeEmitLoadGprImm64(pReNative, off, idxReg, uImm);
2436 AssertReturnStmt(off != UINT32_MAX, iemNativeRegFreeTmp(pReNative, idxReg), UINT8_MAX);
2437 }
2438 return idxReg;
2439}
2440
2441
2442/**
2443 * Marks host register @a idxHstReg as containing a shadow copy of guest
2444 * register @a enmGstReg.
2445 *
2446 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2447 * host register before calling.
2448 */
2449DECL_FORCE_INLINE(void)
2450iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2451{
2452 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2453
2454 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2455 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2456 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2457 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2458#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2459 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2460 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2461#else
2462 RT_NOREF(off);
2463#endif
2464}
2465
2466
2467/**
2468 * Clear any guest register shadow claims from @a idxHstReg.
2469 *
2470 * The register does not need to be shadowing any guest registers.
2471 */
2472DECL_FORCE_INLINE(void)
2473iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2474{
2475 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2476 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2477 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2478 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2479
2480#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2481 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2482 if (fGstRegs)
2483 {
2484 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2485 while (fGstRegs)
2486 {
2487 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2488 fGstRegs &= ~RT_BIT_64(iGstReg);
2489 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2490 }
2491 }
2492#else
2493 RT_NOREF(off);
2494#endif
2495
2496 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2497 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2498 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2499}
2500
2501
2502/**
2503 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2504 * to @a idxRegTo.
2505 */
2506DECL_FORCE_INLINE(void)
2507iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2508 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2509{
2510 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2511 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2512 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2513 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2514 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2515
2516 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2517 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2518 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2519#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2520 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2521 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2522#else
2523 RT_NOREF(off);
2524#endif
2525}
2526
2527
2528
2529/**
2530 * Intended use statement for iemNativeRegAllocTmpForGuestReg().
2531 */
2532typedef enum IEMNATIVEGSTREGUSE
2533{
2534 /** The usage is read-only, the register holding the guest register
2535 * shadow copy will not be modified by the caller. */
2536 kIemNativeGstRegUse_ReadOnly = 0,
2537 /** The caller will update the guest register (think: PC += cbInstr).
2538 * The guest shadow copy will follow the returned register. */
2539 kIemNativeGstRegUse_ForUpdate,
2540 /** The caller will use the guest register value as input in a calculation
2541 * and the host register will be modified.
2542 * This means that the returned host register will not be marked as a shadow
2543 * copy of the guest register. */
2544 kIemNativeGstRegUse_Calculation
2545} IEMNATIVEGSTREGUSE;
2546
2547/**
2548 * Allocates a temporary host general purpose register for updating a guest
2549 * register value.
2550 *
2551 * Since we may already have a register holding the guest register value,
2552 * code will be emitted to do the loading if that's not the case. Code may also
2553 * be emitted if we have to free up a register to satify the request.
2554 *
2555 * @returns The host register number, UINT8_MAX on failure.
2556 * @param pReNative The native recompile state.
2557 * @param poff Pointer to the variable with the code buffer
2558 * position. This will be update if we need to move a
2559 * variable from register to stack in order to satisfy
2560 * the request.
2561 * @param enmGstReg The guest register that will is to be updated.
2562 * @param enmIntendedUse How the caller will be using the host register.
2563 */
2564DECLHIDDEN(uint8_t) iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2565 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse) RT_NOEXCEPT
2566{
2567 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2568#ifdef LOG_ENABLED
2569 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2570#endif
2571
2572 /*
2573 * First check if the guest register value is already in a host register.
2574 */
2575 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2576 {
2577 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2578 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2579 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2580 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2581
2582 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2583 {
2584 /*
2585 * If the register will trash the guest shadow copy, try find a
2586 * completely unused register we can use instead. If that fails,
2587 * we need to disassociate the host reg from the guest reg.
2588 */
2589 /** @todo would be nice to know if preserving the register is in any way helpful. */
2590 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2591 && ( ~pReNative->Core.bmHstRegs
2592 & ~pReNative->Core.bmHstRegsWithGstShadow
2593 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2594 {
2595 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2596 Assert(idxRegNew < RT_ELEMENTS(pReNative->Core.aHstRegs));
2597
2598 uint32_t off = *poff;
2599 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2600 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2601
2602 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2603 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2604 g_apszIemNativeHstRegNames[idxRegNew]));
2605 idxReg = idxRegNew;
2606 }
2607 else
2608 {
2609 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2610 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2611 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2612 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2613 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2614 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2615 else
2616 {
2617 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2618 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2619 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2620 }
2621 }
2622 }
2623 else
2624 {
2625 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2626 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2627
2628 /*
2629 * Allocate a new register, copy the value and, if updating, the
2630 * guest shadow copy assignment to the new register.
2631 */
2632 /** @todo share register for readonly access. */
2633 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2634 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->Core.aHstRegs), UINT8_MAX);
2635
2636 uint32_t off = *poff;
2637 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2638 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2639
2640 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2641 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2642 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2643 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2644 else
2645 {
2646 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2647 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2648 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2649 g_apszIemNativeHstRegNames[idxRegNew]));
2650 }
2651 idxReg = idxRegNew;
2652 }
2653
2654#ifdef VBOX_STRICT
2655 /* Strict builds: Check that the value is correct. */
2656 uint32_t off = *poff;
2657 *poff = off = iemNativeEmitGuestRegValueCheck(pReNative, off, idxReg, enmGstReg);
2658 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2659#endif
2660
2661 return idxReg;
2662 }
2663
2664 /*
2665 * Allocate a new register, load it with the guest value and designate it as a copy of the
2666 */
2667 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2668 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->Core.aHstRegs), UINT8_MAX);
2669
2670 uint32_t off = *poff;
2671 *poff = off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxRegNew, enmGstReg);
2672 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2673
2674 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2675 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, off);
2676 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2677 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2678
2679 return idxRegNew;
2680}
2681
2682
2683DECLHIDDEN(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar) RT_NOEXCEPT;
2684
2685
2686/**
2687 * Allocates argument registers for a function call.
2688 *
2689 * @returns New code buffer offset on success, UINT32_MAX on failure.
2690 * @param pReNative The native recompile state.
2691 * @param off The current code buffer offset.
2692 * @param cArgs The number of arguments the function call takes.
2693 */
2694DECLHIDDEN(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs) RT_NOEXCEPT
2695{
2696 AssertReturn(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT, false);
2697 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2698 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2699
2700 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2701 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2702 else if (cArgs == 0)
2703 return true;
2704
2705 /*
2706 * Do we get luck and all register are free and not shadowing anything?
2707 */
2708 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2709 for (uint32_t i = 0; i < cArgs; i++)
2710 {
2711 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2712 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2713 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2714 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2715 }
2716 /*
2717 * Okay, not lucky so we have to free up the registers.
2718 */
2719 else
2720 for (uint32_t i = 0; i < cArgs; i++)
2721 {
2722 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2723 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2724 {
2725 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2726 {
2727 case kIemNativeWhat_Var:
2728 {
2729 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2730 AssertReturn(idxVar < RT_ELEMENTS(pReNative->Core.aVars), false);
2731 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2732 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2733
2734 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2735 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2736 else
2737 {
2738 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2739 AssertReturn(off != UINT32_MAX, false);
2740 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2741 }
2742 break;
2743 }
2744
2745 case kIemNativeWhat_Tmp:
2746 case kIemNativeWhat_Arg:
2747 case kIemNativeWhat_rc:
2748 AssertFailedReturn(false);
2749 default:
2750 AssertFailedReturn(false);
2751 }
2752
2753 }
2754 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2755 {
2756 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2757 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2758 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2759 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2760 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2761 }
2762 else
2763 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2764 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2765 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2766 }
2767 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2768 return true;
2769}
2770
2771
2772DECLHIDDEN(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
2773
2774
2775#if 0
2776/**
2777 * Frees a register assignment of any type.
2778 *
2779 * @param pReNative The native recompile state.
2780 * @param idxHstReg The register to free.
2781 *
2782 * @note Does not update variables.
2783 */
2784DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2785{
2786 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2788 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2789 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2790 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2791 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2792 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2793 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2794 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2795 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2796 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2797 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2798 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2799 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2800
2801 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2802 /* no flushing, right:
2803 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2804 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2805 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2806 */
2807}
2808#endif
2809
2810
2811/**
2812 * Frees a temporary register.
2813 *
2814 * Any shadow copies of guest registers assigned to the host register will not
2815 * be flushed by this operation.
2816 */
2817DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2818{
2819 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2820 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2822 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2823 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2824}
2825
2826
2827/**
2828 * Frees a temporary immediate register.
2829 *
2830 * It is assumed that the call has not modified the register, so it still hold
2831 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2832 */
2833DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2834{
2835 iemNativeRegFreeTmp(pReNative, idxHstReg);
2836}
2837
2838
2839/**
2840 * Called right before emitting a call instruction to move anything important
2841 * out of call-volatile registers, free and flush the call-volatile registers,
2842 * optionally freeing argument variables.
2843 *
2844 * @returns New code buffer offset, UINT32_MAX on failure.
2845 * @param pReNative The native recompile state.
2846 * @param off The code buffer offset.
2847 * @param cArgs The number of arguments the function call takes.
2848 * It is presumed that the host register part of these have
2849 * been allocated as such already and won't need moving,
2850 * just freeing.
2851 * @param fFreeArgVars Whether to free argument variables for the call.
2852 */
2853DECLHIDDEN(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2854 uint8_t cArgs, bool fFreeArgVars) RT_NOEXCEPT
2855{
2856 /*
2857 * Free argument variables first (simplified).
2858 */
2859 AssertReturn(cArgs <= RT_ELEMENTS(pReNative->Core.aidxArgVars), UINT32_MAX);
2860 if (fFreeArgVars && cArgs > 0)
2861 {
2862 for (uint32_t i = 0; i < cArgs; i++)
2863 {
2864 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
2865 if (idxVar < RT_ELEMENTS(pReNative->Core.aVars))
2866 {
2867 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
2868 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2869 Assert( pReNative->Core.aVars[idxVar].idxReg
2870 == (i < RT_ELEMENTS(g_aidxIemNativeCallRegs) ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
2871 }
2872 }
2873 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2874 }
2875
2876 /*
2877 * Move anything important out of volatile registers.
2878 */
2879 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2880 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2881 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2882#ifdef IEMNATIVE_REG_FIXED_TMP0
2883 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2884#endif
2885 & ~g_afIemNativeCallRegs[cArgs];
2886
2887 fRegsToMove &= pReNative->Core.bmHstRegs;
2888 if (!fRegsToMove)
2889 { /* likely */ }
2890 else
2891 while (fRegsToMove != 0)
2892 {
2893 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2894 fRegsToMove &= ~RT_BIT_32(idxReg);
2895
2896 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2897 {
2898 case kIemNativeWhat_Var:
2899 {
2900 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2901 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2902 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2903 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2904 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2905 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2906 else
2907 {
2908 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2909 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2910 }
2911 continue;
2912 }
2913
2914 case kIemNativeWhat_Arg:
2915 AssertMsgFailed(("What?!?: %u\n", idxReg));
2916 continue;
2917
2918 case kIemNativeWhat_rc:
2919 case kIemNativeWhat_Tmp:
2920 AssertMsgFailed(("Missing free: %u\n", idxReg));
2921 continue;
2922
2923 case kIemNativeWhat_FixedTmp:
2924 case kIemNativeWhat_pVCpuFixed:
2925 case kIemNativeWhat_pCtxFixed:
2926 case kIemNativeWhat_FixedReserved:
2927 case kIemNativeWhat_Invalid:
2928 case kIemNativeWhat_End:
2929 AssertFailedReturn(UINT32_MAX);
2930 }
2931 AssertFailedReturn(UINT32_MAX);
2932 }
2933
2934 /*
2935 * Do the actual freeing.
2936 */
2937 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2938
2939 /* If there are guest register shadows in any call-volatile register, we
2940 have to clear the corrsponding guest register masks for each register. */
2941 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2942 if (fHstRegsWithGstShadow)
2943 {
2944 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
2945 do
2946 {
2947 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
2948 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
2949
2950 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2951 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2952 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2953 } while (fHstRegsWithGstShadow != 0);
2954 }
2955
2956 return off;
2957}
2958
2959
2960/**
2961 * Flushes a set of guest register shadow copies.
2962 *
2963 * This is usually done after calling a threaded function or a C-implementation
2964 * of an instruction.
2965 *
2966 * @param pReNative The native recompile state.
2967 * @param fGstRegs Set of guest registers to flush.
2968 */
2969DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
2970{
2971 /*
2972 * Reduce the mask by what's currently shadowed
2973 */
2974 fGstRegs &= pReNative->Core.bmGstRegShadows;
2975 if (fGstRegs)
2976 {
2977 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
2978 if (pReNative->Core.bmGstRegShadows)
2979 {
2980 /*
2981 * Partial.
2982 */
2983 do
2984 {
2985 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2986 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2987 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
2988 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2989 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
2990
2991 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
2992 fGstRegs &= ~fInThisHstReg;
2993 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
2994 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2996 } while (fGstRegs != 0);
2997 }
2998 else
2999 {
3000 /*
3001 * Clear all.
3002 */
3003 do
3004 {
3005 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3006 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3007 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3008 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3009 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3010
3011 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3012 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3013 } while (fGstRegs != 0);
3014 pReNative->Core.bmHstRegsWithGstShadow = 0;
3015 }
3016 }
3017}
3018
3019
3020/**
3021 * Flushes any delayed guest register writes.
3022 *
3023 * This must be called prior to calling CImpl functions and any helpers that use
3024 * the guest state (like raising exceptions) and such.
3025 *
3026 * This optimization has not yet been implemented. The first target would be
3027 * RIP updates, since these are the most common ones.
3028 */
3029DECLHIDDEN(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off) RT_NOEXCEPT
3030{
3031 RT_NOREF(pReNative, off);
3032 return off;
3033}
3034
3035
3036/*********************************************************************************************************************************
3037* Code Emitters (larger snippets) *
3038*********************************************************************************************************************************/
3039
3040/**
3041 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3042 * extending to 64-bit width.
3043 *
3044 * @returns New code buffer offset on success, UINT32_MAX on failure.
3045 * @param pReNative .
3046 * @param off The current code buffer position.
3047 * @param idxHstReg The host register to load the guest register value into.
3048 * @param enmGstReg The guest register to load.
3049 *
3050 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3051 * that is something the caller needs to do if applicable.
3052 */
3053DECLHIDDEN(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3054 uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT
3055{
3056 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3057 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3058
3059 switch (g_aGstShadowInfo[enmGstReg].cb)
3060 {
3061 case sizeof(uint64_t):
3062 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3063 case sizeof(uint32_t):
3064 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3065 case sizeof(uint16_t):
3066 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3067#if 0 /* not present in the table. */
3068 case sizeof(uint8_t):
3069 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3070#endif
3071 default:
3072 AssertFailedReturn(UINT32_MAX);
3073 }
3074}
3075
3076
3077#ifdef VBOX_STRICT
3078/**
3079 * Emitting code that checks that the content of register @a idxReg is the same
3080 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3081 * instruction if that's not the case.
3082 *
3083 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3084 * Trashes EFLAGS on AMD64.
3085 */
3086static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3087 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT
3088{
3089# ifdef RT_ARCH_AMD64
3090 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3091 AssertReturn(pbCodeBuf, UINT32_MAX);
3092
3093 /* cmp reg, [mem] */
3094 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3095 {
3096 if (idxReg >= 8)
3097 pbCodeBuf[off++] = X86_OP_REX_R;
3098 pbCodeBuf[off++] = 0x38;
3099 }
3100 else
3101 {
3102 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3103 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3104 else
3105 {
3106 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3107 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3108 else
3109 AssertReturn(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t), UINT32_MAX);
3110 if (idxReg >= 8)
3111 pbCodeBuf[off++] = X86_OP_REX_R;
3112 }
3113 pbCodeBuf[off++] = 0x39;
3114 }
3115 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3116
3117 /* je/jz +1 */
3118 pbCodeBuf[off++] = 0x74;
3119 pbCodeBuf[off++] = 0x01;
3120
3121 /* int3 */
3122 pbCodeBuf[off++] = 0xcc;
3123
3124 /* For values smaller than the register size, we must check that the rest
3125 of the register is all zeros. */
3126 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3127 {
3128 /* test reg64, imm32 */
3129 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3130 pbCodeBuf[off++] = 0xf7;
3131 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3132 pbCodeBuf[off++] = 0;
3133 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3134 pbCodeBuf[off++] = 0xff;
3135 pbCodeBuf[off++] = 0xff;
3136
3137 /* je/jz +1 */
3138 pbCodeBuf[off++] = 0x74;
3139 pbCodeBuf[off++] = 0x01;
3140
3141 /* int3 */
3142 pbCodeBuf[off++] = 0xcc;
3143 }
3144 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3145 {
3146 /* rol reg64, 32 */
3147 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3148 pbCodeBuf[off++] = 0xc1;
3149 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3150 pbCodeBuf[off++] = 32;
3151
3152 /* test reg32, ffffffffh */
3153 if (idxReg >= 8)
3154 pbCodeBuf[off++] = X86_OP_REX_B;
3155 pbCodeBuf[off++] = 0xf7;
3156 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3157 pbCodeBuf[off++] = 0xff;
3158 pbCodeBuf[off++] = 0xff;
3159 pbCodeBuf[off++] = 0xff;
3160 pbCodeBuf[off++] = 0xff;
3161
3162 /* je/jz +1 */
3163 pbCodeBuf[off++] = 0x74;
3164 pbCodeBuf[off++] = 0x01;
3165
3166 /* int3 */
3167 pbCodeBuf[off++] = 0xcc;
3168
3169 /* rol reg64, 32 */
3170 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3171 pbCodeBuf[off++] = 0xc1;
3172 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3173 pbCodeBuf[off++] = 32;
3174 }
3175
3176# elif defined(RT_ARCH_ARM64)
3177 /* mov TMP0, [gstreg] */
3178 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3179
3180 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3181 AssertReturn(pu32CodeBuf, UINT32_MAX);
3182 /* sub tmp0, tmp0, idxReg */
3183 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3184 /* cbz tmp0, +1 */
3185 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
3186 /* brk #0x1000+enmGstReg */
3187 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3188
3189# else
3190# error "Port me!"
3191# endif
3192 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3193 return off;
3194}
3195#endif /* VBOX_STRICT */
3196
3197
3198
3199/**
3200 * Emits a code for checking the return code of a call and rcPassUp, returning
3201 * from the code if either are non-zero.
3202 */
3203DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3204 uint8_t idxInstr) RT_NOEXCEPT
3205{
3206#ifdef RT_ARCH_AMD64
3207 /*
3208 * AMD64: eax = call status code.
3209 */
3210
3211 /* edx = rcPassUp */
3212 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3213# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3214 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3215# endif
3216
3217 /* edx = eax | rcPassUp */
3218 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3219 AssertReturn(pbCodeBuf, UINT32_MAX);
3220 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3221 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3222 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3223
3224 /* Jump to non-zero status return path. */
3225 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3226
3227 /* done. */
3228
3229#elif RT_ARCH_ARM64
3230 /*
3231 * ARM64: w0 = call status code.
3232 */
3233 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3234 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3235
3236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3237 AssertReturn(pu32CodeBuf, UINT32_MAX);
3238
3239 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3240
3241 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3242 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
3243 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5), UINT32_MAX);
3244 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
3245
3246#else
3247# error "port me"
3248#endif
3249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3250 return off;
3251}
3252
3253
3254/**
3255 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3256 * raising a \#GP(0) if it isn't.
3257 *
3258 * @returns New code buffer offset, UINT32_MAX on failure.
3259 * @param pReNative The native recompile state.
3260 * @param off The code buffer offset.
3261 * @param idxAddrReg The host register with the address to check.
3262 * @param idxInstr The current instruction.
3263 */
3264DECLHIDDEN(uint32_t) iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3265 uint8_t idxAddrReg, uint8_t idxInstr)
3266{
3267 RT_NOREF(idxInstr);
3268
3269 /*
3270 * Make sure we don't have any outstanding guest register writes as we may
3271 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3272 */
3273 off = iemNativeRegFlushPendingWrites(pReNative, off);
3274
3275#ifdef RT_ARCH_AMD64
3276 /*
3277 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3278 * return raisexcpt();
3279 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3280 */
3281 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3282 AssertReturn(iTmpReg < RT_ELEMENTS(pReNative->Core.aHstRegs), UINT32_MAX);
3283
3284 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3285 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3286 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3287 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3288
3289# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3290 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3291# else
3292 uint32_t const offFixup = off;
3293 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3294 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3295 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3296 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3297# endif
3298
3299 iemNativeRegFreeTmp(pReNative, iTmpReg);
3300
3301#elif defined(RT_ARCH_ARM64)
3302 /*
3303 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3304 * return raisexcpt();
3305 * ----
3306 * mov x1, 0x800000000000
3307 * add x1, x0, x1
3308 * cmp xzr, x1, lsr 48
3309 * and either:
3310 * b.ne .Lraisexcpt
3311 * or:
3312 * b.eq .Lnoexcept
3313 * movz x1, #instruction-number
3314 * b .Lraisexcpt
3315 * .Lnoexcept:
3316 */
3317 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3318 AssertReturn(iTmpReg < RT_ELEMENTS(pReNative->Core.aHstRegs), UINT32_MAX);
3319
3320 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3321 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3322 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3323
3324# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3325 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3326# else
3327 uint32_t const offFixup = off;
3328 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3329 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3330 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3331 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3332# endif
3333
3334 iemNativeRegFreeTmp(pReNative, iTmpReg);
3335
3336#else
3337# error "Port me"
3338#endif
3339 return off;
3340}
3341
3342
3343/**
3344 * Emits code to check if the content of @a idxAddrReg is within the limit of
3345 * idxSegReg, raising a \#GP(0) if it isn't.
3346 *
3347 * @returns New code buffer offset, UINT32_MAX on failure.
3348 * @param pReNative The native recompile state.
3349 * @param off The code buffer offset.
3350 * @param idxAddrReg The host register (32-bit) with the address to
3351 * check.
3352 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3353 * against.
3354 * @param idxInstr The current instruction.
3355 */
3356DECLHIDDEN(uint32_t) iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3357 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3358{
3359 /*
3360 * Make sure we don't have any outstanding guest register writes as we may
3361 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3362 */
3363 off = iemNativeRegFlushPendingWrites(pReNative, off);
3364
3365 /** @todo implement expand down/whatnot checking */
3366 AssertReturn(idxSegReg == X86_SREG_CS, UINT32_MAX);
3367
3368 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3369 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3370 kIemNativeGstRegUse_ForUpdate);
3371 AssertReturn(iTmpLimReg < RT_ELEMENTS(pReNative->Core.aHstRegs), UINT32_MAX);
3372
3373 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3374
3375#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3376 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3377 RT_NOREF(idxInstr);
3378#else
3379 uint32_t const offFixup = off;
3380 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3381 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3382 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3383 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3384#endif
3385
3386 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3387 return off;
3388}
3389
3390
3391/**
3392 * Emits a call to a CImpl function or something similar.
3393 */
3394static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3395 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3396 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3397{
3398 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3399 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3400
3401 /*
3402 * Load the parameters.
3403 */
3404#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3405 /* Special code the hidden VBOXSTRICTRC pointer. */
3406 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3407 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3408 if (cAddParams > 0)
3409 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3410 if (cAddParams > 1)
3411 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3412 if (cAddParams > 2)
3413 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3414 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3415
3416#else
3417 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3418 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3419 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3420 if (cAddParams > 0)
3421 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3422 if (cAddParams > 1)
3423 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3424 if (cAddParams > 2)
3425# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3426 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3427# else
3428 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3429# endif
3430#endif
3431 AssertReturn(off != UINT32_MAX, off);
3432
3433 /*
3434 * Make the call.
3435 */
3436 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3437
3438#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3439 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3440#endif
3441
3442 /*
3443 * Check the status code.
3444 */
3445 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3446}
3447
3448
3449/**
3450 * Emits a call to a threaded worker function.
3451 */
3452static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3453{
3454 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3455 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3456 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3457
3458#ifdef RT_ARCH_AMD64
3459 /* Load the parameters and emit the call. */
3460# ifdef RT_OS_WINDOWS
3461# ifndef VBOXSTRICTRC_STRICT_ENABLED
3462 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3463 if (cParams > 0)
3464 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3465 if (cParams > 1)
3466 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3467 if (cParams > 2)
3468 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3469# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3470 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3471 if (cParams > 0)
3472 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3473 if (cParams > 1)
3474 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3475 if (cParams > 2)
3476 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3477 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3478 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3479# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3480# else
3481 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3482 if (cParams > 0)
3483 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3484 if (cParams > 1)
3485 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3486 if (cParams > 2)
3487 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3488# endif
3489
3490 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3491
3492# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3493 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3494# endif
3495
3496#elif RT_ARCH_ARM64
3497 /*
3498 * ARM64:
3499 */
3500 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3501 if (cParams > 0)
3502 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3503 if (cParams > 1)
3504 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3505 if (cParams > 2)
3506 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3507
3508 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3509
3510#else
3511# error "port me"
3512#endif
3513
3514 /*
3515 * Check the status code.
3516 */
3517 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3518 AssertReturn(off != UINT32_MAX, off);
3519
3520 return off;
3521}
3522
3523
3524/**
3525 * Emits the code at the RaiseGP0 label.
3526 */
3527static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3528{
3529 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3530 if (idxLabel != UINT32_MAX)
3531 {
3532 iemNativeLabelDefine(pReNative, idxLabel, off);
3533
3534 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3535 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3536#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3537 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3538#endif
3539 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3540
3541 /* jump back to the return sequence. */
3542 off = iemNativeEmitJmpToLabel(pReNative, off, iemNativeLabelFind(pReNative, kIemNativeLabelType_Return));
3543 }
3544 return off;
3545}
3546
3547
3548/**
3549 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3550 */
3551static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3552{
3553 /*
3554 * Generate the rc + rcPassUp fiddling code if needed.
3555 */
3556 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3557 if (idxLabel != UINT32_MAX)
3558 {
3559 iemNativeLabelDefine(pReNative, idxLabel, off);
3560
3561 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3562#ifdef RT_ARCH_AMD64
3563# ifdef RT_OS_WINDOWS
3564# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3565 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3566# endif
3567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3568 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3569# else
3570 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3571 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3572# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3573 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3574# endif
3575# endif
3576# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3577 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3578# endif
3579
3580#else
3581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3582 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3583 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3584#endif
3585
3586 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3587 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3588 }
3589 return off;
3590}
3591
3592
3593/**
3594 * Emits a standard epilog.
3595 */
3596static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3597{
3598 /*
3599 * Successful return, so clear the return register (eax, w0).
3600 */
3601 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3602 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3603
3604 /*
3605 * Define label for common return point.
3606 */
3607 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3608 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
3609
3610 /*
3611 * Restore registers and return.
3612 */
3613#ifdef RT_ARCH_AMD64
3614 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3615 AssertReturn(pbCodeBuf, UINT32_MAX);
3616
3617 /* Reposition esp at the r15 restore point. */
3618 pbCodeBuf[off++] = X86_OP_REX_W;
3619 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3620 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3621 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3622
3623 /* Pop non-volatile registers and return */
3624 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3625 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3626 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3627 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3628 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3629 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3630 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3631 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3632# ifdef RT_OS_WINDOWS
3633 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3634 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3635# endif
3636 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3637 pbCodeBuf[off++] = 0xc9; /* leave */
3638 pbCodeBuf[off++] = 0xc3; /* ret */
3639 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3640
3641#elif RT_ARCH_ARM64
3642 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3643 AssertReturn(pu32CodeBuf, UINT32_MAX);
3644
3645 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3646 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3647 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3648 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3649 IEMNATIVE_FRAME_VAR_SIZE / 8);
3650 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3651 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3652 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3653 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3654 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3655 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3656 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3657 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3658 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3659 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3660 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3661 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3662
3663 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3664 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3665 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3666 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3667
3668 /* retab / ret */
3669# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3670 if (1)
3671 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3672 else
3673# endif
3674 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3675
3676#else
3677# error "port me"
3678#endif
3679 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3680
3681 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3682}
3683
3684
3685/**
3686 * Emits a standard prolog.
3687 */
3688static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3689{
3690#ifdef RT_ARCH_AMD64
3691 /*
3692 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3693 * reserving 64 bytes for stack variables plus 4 non-register argument
3694 * slots. Fixed register assignment: xBX = pReNative;
3695 *
3696 * Since we always do the same register spilling, we can use the same
3697 * unwind description for all the code.
3698 */
3699 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3700 AssertReturn(pbCodeBuf, UINT32_MAX);
3701 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3702 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3703 pbCodeBuf[off++] = 0x8b;
3704 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3705 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3706 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3707# ifdef RT_OS_WINDOWS
3708 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3709 pbCodeBuf[off++] = 0x8b;
3710 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3711 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3712 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3713# else
3714 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3715 pbCodeBuf[off++] = 0x8b;
3716 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3717# endif
3718 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3719 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3720 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3721 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3722 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3723 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3724 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3725 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3726
3727 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3728 X86_GREG_xSP,
3729 IEMNATIVE_FRAME_ALIGN_SIZE
3730 + IEMNATIVE_FRAME_VAR_SIZE
3731 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3732 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3733 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3734 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3735 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3736
3737#elif RT_ARCH_ARM64
3738 /*
3739 * We set up a stack frame exactly like on x86, only we have to push the
3740 * return address our selves here. We save all non-volatile registers.
3741 */
3742 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3743 AssertReturn(pu32CodeBuf, UINT32_MAX);
3744
3745# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3746 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3747 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3748 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3749 /* pacibsp */
3750 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3751# endif
3752
3753 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3754 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3755 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3756 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3757 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3758 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3759 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3760 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3761 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3762 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3763 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3764 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3765 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3766 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3767 /* Save the BP and LR (ret address) registers at the top of the frame. */
3768 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3769 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3770 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3771 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3772 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3773 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3774
3775 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3776 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3777
3778 /* mov r28, r0 */
3779 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3780 /* mov r27, r1 */
3781 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3782
3783#else
3784# error "port me"
3785#endif
3786 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3787 return off;
3788}
3789
3790
3791
3792/*********************************************************************************************************************************
3793* Emitters for IEM_MC_XXXX and the associated IEM_MC_XXXX recompiler definitions *
3794*********************************************************************************************************************************/
3795
3796#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3797 {
3798
3799/** We have to get to the end in recompilation mode, as otherwise we won't
3800 * generate code for all the IEM_MC_IF_XXX branches. */
3801#define IEM_MC_END() \
3802 } return off
3803
3804
3805/*
3806 * Standalone CImpl deferals.
3807 */
3808
3809#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3810 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3811
3812
3813#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3814 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3815
3816DECLINLINE(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3817 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3818{
3819 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3820}
3821
3822
3823#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3824 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3825
3826DECLINLINE(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3827 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3828{
3829 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3830}
3831
3832
3833#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3834 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3835
3836DECLINLINE(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3837 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1, uint64_t uArg2)
3838{
3839 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3840}
3841
3842
3843/*
3844 * Advancing PC/RIP/EIP/IP.
3845 */
3846
3847#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
3848 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3849 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3850
3851/** Same as iemRegAddToRip64AndFinishingNoFlags. */
3852DECLINLINE(uint32_t) iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3853{
3854 /* Allocate a temporary PC register. */
3855 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3856 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3857
3858 /* Perform the addition and store the result. */
3859 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
3860 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3861
3862 /* Free but don't flush the PC register. */
3863 iemNativeRegFreeTmp(pReNative, idxPcReg);
3864
3865 return off;
3866}
3867
3868
3869#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
3870 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3871 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3872
3873/** Same as iemRegAddToEip32AndFinishingNoFlags. */
3874DECLINLINE(uint32_t) iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3875{
3876 /* Allocate a temporary PC register. */
3877 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3878 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3879
3880 /* Perform the addition and store the result. */
3881 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3882 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3883
3884 /* Free but don't flush the PC register. */
3885 iemNativeRegFreeTmp(pReNative, idxPcReg);
3886
3887 return off;
3888}
3889
3890
3891#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
3892 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3893 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3894
3895/** Same as iemRegAddToIp16AndFinishingNoFlags. */
3896DECLINLINE(uint32_t) iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3897{
3898 /* Allocate a temporary PC register. */
3899 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3900 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3901
3902 /* Perform the addition and store the result. */
3903 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3904 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3905 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3906
3907 /* Free but don't flush the PC register. */
3908 iemNativeRegFreeTmp(pReNative, idxPcReg);
3909
3910 return off;
3911}
3912
3913
3914/*
3915 * Changing PC/RIP/EIP/IP with a relative jump.
3916 */
3917
3918#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
3919 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
3920 (a_enmEffOpSize), pCallEntry->idxInstr); \
3921 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3922
3923
3924#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
3925 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
3926 IEMMODE_16BIT, pCallEntry->idxInstr); \
3927 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3928
3929#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
3930 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
3931 IEMMODE_64BIT, pCallEntry->idxInstr); \
3932 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3933
3934/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
3935 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
3936 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
3937DECLINLINE(uint32_t) iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3938 uint8_t cbInstr, int32_t offDisp, IEMMODE enmEffOpSize,
3939 uint8_t idxInstr)
3940{
3941 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
3942
3943 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
3944 off = iemNativeRegFlushPendingWrites(pReNative, off);
3945
3946 /* Allocate a temporary PC register. */
3947 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3948 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3949
3950 /* Perform the addition. */
3951 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
3952
3953 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
3954 {
3955 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
3956 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
3957 }
3958 else
3959 {
3960 /* Just truncate the result to 16-bit IP. */
3961 Assert(enmEffOpSize == IEMMODE_16BIT);
3962 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3963 }
3964 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3965
3966 /* Free but don't flush the PC register. */
3967 iemNativeRegFreeTmp(pReNative, idxPcReg);
3968
3969 return off;
3970}
3971
3972
3973#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
3974 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
3975 (a_enmEffOpSize), pCallEntry->idxInstr); \
3976 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3977
3978#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
3979 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
3980 IEMMODE_16BIT, pCallEntry->idxInstr); \
3981 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3982
3983#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
3984 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
3985 IEMMODE_32BIT, pCallEntry->idxInstr); \
3986 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3987
3988/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
3989 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
3990 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
3991DECLINLINE(uint32_t) iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3992 uint8_t cbInstr, int32_t offDisp, IEMMODE enmEffOpSize,
3993 uint8_t idxInstr)
3994{
3995 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
3996
3997 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
3998 off = iemNativeRegFlushPendingWrites(pReNative, off);
3999
4000 /* Allocate a temporary PC register. */
4001 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4002 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
4003
4004 /* Perform the addition. */
4005 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4006
4007 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4008 if (enmEffOpSize == IEMMODE_16BIT)
4009 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4010
4011 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4012 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4013
4014 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4015
4016 /* Free but don't flush the PC register. */
4017 iemNativeRegFreeTmp(pReNative, idxPcReg);
4018
4019 return off;
4020}
4021
4022
4023#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4024 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
4025 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4026
4027#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4028 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
4029 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4030
4031#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4032 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
4033 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4034
4035/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4036DECLINLINE(uint32_t) iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4037 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4038{
4039 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4040 off = iemNativeRegFlushPendingWrites(pReNative, off);
4041
4042 /* Allocate a temporary PC register. */
4043 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4044 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
4045
4046 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4047 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4048 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4049 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4050 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4051
4052 /* Free but don't flush the PC register. */
4053 iemNativeRegFreeTmp(pReNative, idxPcReg);
4054
4055 return off;
4056}
4057
4058
4059/*
4060 * Conditionals.
4061 */
4062
4063/**
4064 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4065 *
4066 * @returns Pointer to the condition stack entry on success, NULL on failure
4067 * (too many nestings)
4068 */
4069DECLINLINE(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4070{
4071 uint32_t const idxStack = pReNative->cCondDepth;
4072 AssertReturn(idxStack < RT_ELEMENTS(pReNative->aCondStack), NULL);
4073
4074 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4075 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4076
4077 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4078 pEntry->fInElse = false;
4079 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4080 AssertReturn(pEntry->idxLabelElse != UINT32_MAX, NULL);
4081 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4082 AssertReturn(pEntry->idxLabelEndIf != UINT32_MAX, NULL);
4083
4084 return pEntry;
4085}
4086
4087
4088/**
4089 * Start of the if-block, snapshotting the register and variable state.
4090 */
4091DECLINLINE(void) iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock)
4092{
4093 Assert(offIfBlock != UINT32_MAX);
4094 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4095 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4096 Assert(!pEntry->fInElse);
4097
4098 /* Define the start of the IF block for disassembly. */
4099#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4100 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4101#else
4102 RT_NOREF(offIfBlock);
4103#endif
4104
4105 /* Copy the initial state so we can restore it in the 'else' block. */
4106 pEntry->InitialState = pReNative->Core;
4107}
4108
4109
4110#define IEM_MC_ELSE() } while (0); \
4111 off = iemNativeEmitElse(pReNative, off); \
4112 AssertReturn(off != UINT32_MAX, UINT32_MAX); \
4113 do {
4114
4115/** Emits code related to IEM_MC_ELSE. */
4116DECLINLINE(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4117{
4118 /* Check sanity and get the conditional stack entry. */
4119 Assert(off != UINT32_MAX);
4120 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4121 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4122 Assert(!pEntry->fInElse);
4123
4124 /* Jump to the endif */
4125 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4126
4127 /* Define the else label and enter the else part of the condition. */
4128 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4129 pEntry->fInElse = true;
4130
4131 /* Snapshot the core state so we can do a merge at the endif and restore
4132 the snapshot we took at the start of the if-block. */
4133 pEntry->IfFinalState = pReNative->Core;
4134 pReNative->Core = pEntry->InitialState;
4135
4136 return off;
4137}
4138
4139
4140#define IEM_MC_ENDIF() } while (0); \
4141 off = iemNativeEmitEndIf(pReNative, off); \
4142 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4143
4144/** Emits code related to IEM_MC_ENDIF. */
4145DECLINLINE(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4146{
4147 /* Check sanity and get the conditional stack entry. */
4148 Assert(off != UINT32_MAX);
4149 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4150 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4151
4152 /*
4153 * Now we have find common group with the core state at the end of the
4154 * if-final. Use the smallest common denominator and just drop anything
4155 * that isn't the same in both states.
4156 */
4157 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4158 * which is why we're doing this at the end of the else-block.
4159 * But we'd need more info about future for that to be worth the effort. */
4160 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4161 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4162 {
4163 /* shadow guest stuff first. */
4164 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4165 if (fGstRegs)
4166 {
4167 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4168 do
4169 {
4170 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4171 fGstRegs &= ~RT_BIT_64(idxGstReg);
4172
4173 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4174 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4175 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4176 {
4177 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4178 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4179 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4180 }
4181 } while (fGstRegs);
4182 }
4183 else
4184 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4185
4186 /* Check variables next. For now we must require them to be identical
4187 or stuff we can recreate. */
4188 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4189 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4190 if (fVars)
4191 {
4192 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4193 do
4194 {
4195 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4196 fVars &= ~RT_BIT_32(idxVar);
4197
4198 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4199 {
4200 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4201 continue;
4202 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4203 {
4204 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4205 if (idxHstReg != UINT8_MAX)
4206 {
4207 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4208 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4209 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4210 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4211 }
4212 continue;
4213 }
4214 }
4215 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4216 continue;
4217
4218 /* Irreconcilable, so drop it. */
4219 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4220 if (idxHstReg != UINT8_MAX)
4221 {
4222 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4223 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4224 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4225 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4226 }
4227 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4228 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4229 } while (fVars);
4230 }
4231
4232 /* Finally, check that the host register allocations matches. */
4233 AssertMsgReturn(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4234 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4235 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4236 UINT32_MAX);
4237 }
4238
4239 /*
4240 * Define the endif label and maybe the else one if we're still in the 'if' part.
4241 */
4242 if (!pEntry->fInElse)
4243 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4244 else
4245 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4246 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4247
4248 /* Pop the conditional stack.*/
4249 pReNative->cCondDepth -= 1;
4250
4251 return off;
4252}
4253
4254
4255#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4256 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4257 AssertReturn(off != UINT32_MAX, UINT32_MAX); \
4258 do {
4259
4260/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4261DECLINLINE(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4262{
4263 PIEMNATIVECOND pEntry = iemNativeCondPushIf(pReNative);
4264 AssertReturn(pEntry, UINT32_MAX);
4265
4266 /* Get the eflags. */
4267 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4268 kIemNativeGstRegUse_ReadOnly);
4269 AssertReturn(idxEflReg != UINT8_MAX, UINT32_MAX);
4270
4271 /* Test and jump. */
4272 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4273
4274 /* Free but don't flush the EFlags register. */
4275 iemNativeRegFreeTmp(pReNative, idxEflReg);
4276
4277 /* Make a copy of the core state now as we start the if-block. */
4278 iemNativeCondStartIfBlock(pReNative, off);
4279
4280 return off;
4281}
4282
4283
4284#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4285 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4286 AssertReturn(off != UINT32_MAX, UINT32_MAX); \
4287 do {
4288
4289/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4290DECLINLINE(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4291{
4292 PIEMNATIVECOND pEntry = iemNativeCondPushIf(pReNative);
4293 AssertReturn(pEntry, UINT32_MAX);
4294
4295 /* Get the eflags. */
4296 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4297 kIemNativeGstRegUse_ReadOnly);
4298 AssertReturn(idxEflReg != UINT8_MAX, UINT32_MAX);
4299
4300 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4301 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4302
4303 /* Test and jump. */
4304 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4305
4306 /* Free but don't flush the EFlags register. */
4307 iemNativeRegFreeTmp(pReNative, idxEflReg);
4308
4309 /* Make a copy of the core state now as we start the if-block. */
4310 iemNativeCondStartIfBlock(pReNative, off);
4311
4312 return off;
4313}
4314
4315
4316
4317/*********************************************************************************************************************************
4318* Builtin functions *
4319*********************************************************************************************************************************/
4320
4321/**
4322 * Built-in function that calls a C-implemention function taking zero arguments.
4323 */
4324static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
4325{
4326 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
4327 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
4328 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
4329}
4330
4331
4332
4333/*********************************************************************************************************************************
4334* The native code generator functions for each MC block. *
4335*********************************************************************************************************************************/
4336
4337
4338/*
4339 * Include g_apfnIemNativeRecompileFunctions and associated functions.
4340 *
4341 * This should probably live in it's own file later, but lets see what the
4342 * compile times turn out to be first.
4343 */
4344#include "IEMNativeFunctions.cpp.h"
4345
4346
4347
4348/*********************************************************************************************************************************
4349* Recompiler Core. *
4350*********************************************************************************************************************************/
4351
4352
4353/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
4354static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
4355{
4356 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
4357 pDis->cbCachedInstr += cbMaxRead;
4358 RT_NOREF(cbMinRead);
4359 return VERR_NO_DATA;
4360}
4361
4362
4363/**
4364 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
4365 * @returns pszBuf.
4366 * @param fFlags The flags.
4367 * @param pszBuf The output buffer.
4368 * @param cbBuf The output buffer size. At least 32 bytes.
4369 */
4370const char *iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf)
4371{
4372 Assert(cbBuf >= 32);
4373 static RTSTRTUPLE const s_aModes[] =
4374 {
4375 /* [00] = */ { RT_STR_TUPLE("16BIT") },
4376 /* [01] = */ { RT_STR_TUPLE("32BIT") },
4377 /* [02] = */ { RT_STR_TUPLE("!2!") },
4378 /* [03] = */ { RT_STR_TUPLE("!3!") },
4379 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
4380 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
4381 /* [06] = */ { RT_STR_TUPLE("!6!") },
4382 /* [07] = */ { RT_STR_TUPLE("!7!") },
4383 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
4384 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
4385 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
4386 /* [0b] = */ { RT_STR_TUPLE("!b!") },
4387 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
4388 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
4389 /* [0e] = */ { RT_STR_TUPLE("!e!") },
4390 /* [0f] = */ { RT_STR_TUPLE("!f!") },
4391 /* [10] = */ { RT_STR_TUPLE("!10!") },
4392 /* [11] = */ { RT_STR_TUPLE("!11!") },
4393 /* [12] = */ { RT_STR_TUPLE("!12!") },
4394 /* [13] = */ { RT_STR_TUPLE("!13!") },
4395 /* [14] = */ { RT_STR_TUPLE("!14!") },
4396 /* [15] = */ { RT_STR_TUPLE("!15!") },
4397 /* [16] = */ { RT_STR_TUPLE("!16!") },
4398 /* [17] = */ { RT_STR_TUPLE("!17!") },
4399 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
4400 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
4401 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
4402 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
4403 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
4404 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
4405 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
4406 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
4407 };
4408 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
4409 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
4410 unsigned off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
4411
4412 pszBuf[off++] = ' ';
4413 pszBuf[off++] = 'C';
4414 pszBuf[off++] = 'P';
4415 pszBuf[off++] = 'L';
4416 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
4417 Assert(off < 32);
4418
4419 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
4420
4421 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
4422 {
4423 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
4424 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
4425 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
4426 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
4427 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
4428 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
4429 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
4430 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
4431 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
4432 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
4433 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
4434 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
4435 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
4436 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
4437 };
4438 if (fFlags)
4439 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
4440 if (s_aFlags[i].fFlag & fFlags)
4441 {
4442 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
4443 pszBuf[off++] = ' ';
4444 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
4445 off += s_aFlags[i].cchName;
4446 fFlags &= ~s_aFlags[i].fFlag;
4447 if (!fFlags)
4448 break;
4449 }
4450 pszBuf[off] = '\0';
4451
4452 return pszBuf;
4453}
4454
4455
4456void iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp)
4457{
4458 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
4459
4460 char szDisBuf[512];
4461 DISSTATE Dis;
4462 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
4463 uint32_t const cNative = pTb->Native.cInstructions;
4464 uint32_t offNative = 0;
4465#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4466 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
4467#endif
4468 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
4469 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
4470 : DISCPUMODE_64BIT;
4471#ifdef RT_ARCH_AMD64
4472 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
4473#elif defined(RT_ARCH_ARM64)
4474 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
4475#else
4476# error "Port me"
4477#endif
4478
4479 /*
4480 * Print TB info.
4481 */
4482 pHlp->pfnPrintf(pHlp,
4483 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
4484 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
4485 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
4486 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
4487#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4488 if (pDbgInfo && pDbgInfo->cEntries > 1)
4489 {
4490 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
4491
4492 /*
4493 * This disassembly is driven by the debug info which follows the native
4494 * code and indicates when it starts with the next guest instructions,
4495 * where labels are and such things.
4496 */
4497 uint32_t idxThreadedCall = 0;
4498 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
4499 uint8_t idxRange = UINT8_MAX;
4500 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
4501 uint32_t offRange = 0;
4502 uint32_t offOpcodes = 0;
4503 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
4504 uint32_t const cDbgEntries = pDbgInfo->cEntries;
4505 uint32_t iDbgEntry = 1;
4506 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
4507
4508 while (offNative < cNative)
4509 {
4510 /* If we're at or have passed the point where the next chunk of debug
4511 info starts, process it. */
4512 if (offDbgNativeNext <= offNative)
4513 {
4514 offDbgNativeNext = UINT32_MAX;
4515 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
4516 {
4517 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
4518 {
4519 case kIemTbDbgEntryType_GuestInstruction:
4520 {
4521 /* Did the exec flag change? */
4522 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
4523 {
4524 pHlp->pfnPrintf(pHlp,
4525 " fExec change %#08x -> %#08x %s\n",
4526 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
4527 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
4528 szDisBuf, sizeof(szDisBuf)));
4529 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
4530 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
4531 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
4532 : DISCPUMODE_64BIT;
4533 }
4534
4535 /* New opcode range? We need to fend up a spurious debug info entry here for cases
4536 where the compilation was aborted before the opcode was recorded and the actual
4537 instruction was translated to a threaded call. This may happen when we run out
4538 of ranges, or when some complicated interrupts/FFs are found to be pending or
4539 similar. So, we just deal with it here rather than in the compiler code as it
4540 is a lot simpler to do up here. */
4541 if ( idxRange == UINT8_MAX
4542 || idxRange >= cRanges
4543 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
4544 {
4545 idxRange += 1;
4546 if (idxRange < cRanges)
4547 offRange = 0;
4548 else
4549 continue;
4550 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
4551 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
4552 + (pTb->aRanges[idxRange].idxPhysPage == 0
4553 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
4554 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
4555 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
4556 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
4557 pTb->aRanges[idxRange].idxPhysPage);
4558 }
4559
4560 /* Disassemble the instruction. */
4561 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
4562 uint32_t cbInstr = 1;
4563 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
4564 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
4565 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
4566 if (RT_SUCCESS(rc))
4567 {
4568 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4569 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4570 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4571 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4572
4573 static unsigned const s_offMarker = 55;
4574 static char const s_szMarker[] = " ; <--- guest";
4575 if (cch < s_offMarker)
4576 {
4577 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
4578 cch = s_offMarker;
4579 }
4580 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
4581 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
4582
4583 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
4584 }
4585 else
4586 {
4587 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
4588 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
4589 cbInstr = 1;
4590 }
4591 GCPhysPc += cbInstr;
4592 offOpcodes += cbInstr;
4593 offRange += cbInstr;
4594 continue;
4595 }
4596
4597 case kIemTbDbgEntryType_ThreadedCall:
4598 pHlp->pfnPrintf(pHlp,
4599 " Call #%u to %s (%u args)\n",
4600 idxThreadedCall,
4601 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
4602 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall]);
4603 idxThreadedCall++;
4604 continue;
4605
4606 case kIemTbDbgEntryType_GuestRegShadowing:
4607 {
4608 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
4609 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
4610 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
4611 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
4612 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
4613 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
4614 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
4615 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
4616 else
4617 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
4618 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
4619 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
4620 continue;
4621 }
4622
4623 case kIemTbDbgEntryType_Label:
4624 {
4625 const char *pszName = "what_the_fudge";
4626 const char *pszComment = "";
4627 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
4628 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
4629 {
4630 case kIemNativeLabelType_Return:
4631 pszName = "Return";
4632 break;
4633 case kIemNativeLabelType_If:
4634 pszName = "If";
4635 fNumbered = true;
4636 break;
4637 case kIemNativeLabelType_Else:
4638 pszName = "Else";
4639 fNumbered = true;
4640 pszComment = " ; regs state restored pre-if-block";
4641 break;
4642 case kIemNativeLabelType_Endif:
4643 pszName = "Endif";
4644 fNumbered = true;
4645 break;
4646 case kIemNativeLabelType_NonZeroRetOrPassUp:
4647 pszName = "NonZeroRetOrPassUp";
4648 break;
4649 case kIemNativeLabelType_RaiseGp0:
4650 pszName = "RaiseGp0";
4651 break;
4652 case kIemNativeLabelType_Invalid:
4653 case kIemNativeLabelType_End:
4654 break;
4655 }
4656 if (fNumbered)
4657 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
4658 else
4659 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
4660 continue;
4661 }
4662
4663 case kIemTbDbgEntryType_NativeOffset:
4664 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
4665 Assert(offDbgNativeNext > offNative);
4666 break;
4667
4668 default:
4669 AssertFailed();
4670 }
4671 iDbgEntry++;
4672 break;
4673 }
4674 }
4675
4676 /*
4677 * Disassemble the next native instruction.
4678 */
4679 uint32_t cbInstr = sizeof(paNative[0]);
4680 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
4681 if (RT_SUCCESS(rc))
4682 {
4683# if defined(RT_ARCH_AMD64)
4684 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
4685 {
4686 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
4687 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
4688 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)\n",
4689 &paNative[offNative], RT_LOWORD(uInfo), g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
4690 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)]);
4691 else
4692 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
4693 }
4694 else
4695# endif
4696 {
4697 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4698 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4699 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4700 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4701 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
4702 }
4703 }
4704 else
4705 {
4706# if defined(RT_ARCH_AMD64)
4707 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
4708 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
4709# elif defined(RT_ARCH_ARM64)
4710 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
4711 &paNative[offNative], paNative[offNative], rc);
4712# else
4713# error "Port me"
4714# endif
4715 cbInstr = sizeof(paNative[0]);
4716 }
4717 offNative += cbInstr / sizeof(paNative[0]);
4718 }
4719 }
4720 else
4721#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
4722 {
4723 /*
4724 * No debug info, just disassemble the x86 code and then the native code.
4725 */
4726 /* The guest code. */
4727 for (unsigned i = 0; i < pTb->cRanges; i++)
4728 {
4729 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
4730 + (pTb->aRanges[i].idxPhysPage == 0
4731 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
4732 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
4733 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
4734 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
4735 unsigned off = pTb->aRanges[i].offOpcodes;
4736 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
4737 while (off < cbOpcodes)
4738 {
4739 uint32_t cbInstr = 1;
4740 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
4741 &pTb->pabOpcodes[off], cbOpcodes - off,
4742 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
4743 if (RT_SUCCESS(rc))
4744 {
4745 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4746 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4747 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4748 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4749 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
4750 GCPhysPc += cbInstr;
4751 off += cbInstr;
4752 }
4753 else
4754 {
4755 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
4756 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
4757 break;
4758 }
4759 }
4760 }
4761
4762 /* The native code: */
4763 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
4764 while(offNative < cNative)
4765 {
4766 uint32_t cbInstr = sizeof(paNative[0]);
4767 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
4768 if (RT_SUCCESS(rc))
4769 {
4770# if defined(RT_ARCH_AMD64)
4771 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
4772 {
4773 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
4774 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
4775 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)\n",
4776 &paNative[offNative], RT_LOWORD(uInfo), g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
4777 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)]);
4778 else
4779 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
4780 }
4781 else
4782# endif
4783 {
4784 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4785 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4786 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4787 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4788 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
4789 }
4790 }
4791 else
4792 {
4793# if defined(RT_ARCH_AMD64)
4794 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
4795 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
4796# elif defined(RT_ARCH_ARM64)
4797 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
4798 &paNative[offNative], paNative[offNative], rc);
4799# else
4800# error "Port me"
4801#endif
4802 cbInstr = sizeof(paNative[0]);
4803 }
4804 offNative += cbInstr / sizeof(paNative[0]);
4805 }
4806 }
4807}
4808
4809
4810/**
4811 * Recompiles the given threaded TB into a native one.
4812 *
4813 * In case of failure the translation block will be returned as-is.
4814 *
4815 * @returns pTb.
4816 * @param pVCpu The cross context virtual CPU structure of the calling
4817 * thread.
4818 * @param pTb The threaded translation to recompile to native.
4819 */
4820PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
4821{
4822 /*
4823 * The first time thru, we allocate the recompiler state, the other times
4824 * we just need to reset it before using it again.
4825 */
4826 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
4827 if (RT_LIKELY(pReNative))
4828 iemNativeReInit(pReNative, pTb);
4829 else
4830 {
4831 pReNative = iemNativeInit(pVCpu, pTb);
4832 AssertReturn(pReNative, pTb);
4833 }
4834
4835 /*
4836 * Emit prolog code (fixed).
4837 */
4838 uint32_t off = iemNativeEmitProlog(pReNative, 0);
4839 AssertReturn(off != UINT32_MAX, pTb);
4840
4841 /*
4842 * Convert the calls to native code.
4843 */
4844#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4845 int32_t iGstInstr = -1;
4846 uint32_t fExec = pTb->fFlags;
4847#endif
4848 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
4849 uint32_t cCallsLeft = pTb->Thrd.cCalls;
4850#ifdef LOG_ENABLED
4851 uint32_t const cCallsOrg = cCallsLeft;
4852#endif
4853 while (cCallsLeft-- > 0)
4854 {
4855 /*
4856 * Debug info and assembly markup.
4857 */
4858#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4859 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
4860 fExec = pCallEntry->auParams[0];
4861 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4862 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
4863 {
4864 if (iGstInstr < (int32_t)pTb->cInstructions)
4865 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
4866 else
4867 Assert(iGstInstr == pTb->cInstructions);
4868 iGstInstr = pCallEntry->idxInstr;
4869 }
4870 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction);
4871#endif
4872
4873#ifdef VBOX_STRICT
4874 off = iemNativeEmitMarker(pReNative, off, RT_MAKE_U32(pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->enmFunction));
4875 AssertReturn(off != UINT32_MAX, pTb);
4876#endif
4877 /*
4878 * Actual work.
4879 */
4880 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
4881 if (pfnRecom) /** @todo stats on this. */
4882 {
4883 //STAM_COUNTER_INC()
4884 off = pfnRecom(pReNative, off, pCallEntry);
4885 }
4886 else
4887 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
4888 AssertReturn(off != UINT32_MAX, pTb);
4889 Assert(pReNative->cCondDepth == 0);
4890
4891 /*
4892 * Advance.
4893 */
4894 pCallEntry++;
4895 }
4896
4897 /*
4898 * Emit the epilog code.
4899 */
4900 off = iemNativeEmitEpilog(pReNative, off);
4901 AssertReturn(off != UINT32_MAX, pTb);
4902
4903 /*
4904 * Generate special jump labels.
4905 */
4906 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
4907 {
4908 off = iemNativeEmitRaiseGp0(pReNative, off);
4909 AssertReturn(off != UINT32_MAX, pTb);
4910 }
4911
4912 /*
4913 * Make sure all labels has been defined.
4914 */
4915 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
4916#ifdef VBOX_STRICT
4917 uint32_t const cLabels = pReNative->cLabels;
4918 for (uint32_t i = 0; i < cLabels; i++)
4919 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
4920#endif
4921
4922 /*
4923 * Allocate executable memory, copy over the code we've generated.
4924 */
4925 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
4926 if (pTbAllocator->pDelayedFreeHead)
4927 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
4928
4929 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
4930 AssertReturn(paFinalInstrBuf, pTb);
4931 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
4932
4933 /*
4934 * Apply fixups.
4935 */
4936 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
4937 uint32_t const cFixups = pReNative->cFixups;
4938 for (uint32_t i = 0; i < cFixups; i++)
4939 {
4940 Assert(paFixups[i].off < off);
4941 Assert(paFixups[i].idxLabel < cLabels);
4942 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
4943 switch (paFixups[i].enmType)
4944 {
4945#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4946 case kIemNativeFixupType_Rel32:
4947 Assert(paFixups[i].off + 4 <= off);
4948 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
4949 continue;
4950
4951#elif defined(RT_ARCH_ARM64)
4952 case kIemNativeFixupType_RelImm19At5:
4953 {
4954 Assert(paFixups[i].off < off);
4955 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
4956 Assert(offDisp >= -262144 && offDisp < 262144);
4957 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
4958 continue;
4959 }
4960#endif
4961 case kIemNativeFixupType_Invalid:
4962 case kIemNativeFixupType_End:
4963 break;
4964 }
4965 AssertFailed();
4966 }
4967
4968 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
4969
4970 /*
4971 * Convert the translation block.
4972 */
4973 //RT_BREAKPOINT();
4974 RTMemFree(pTb->Thrd.paCalls);
4975 pTb->Native.paInstructions = paFinalInstrBuf;
4976 pTb->Native.cInstructions = off;
4977 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
4978#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4979 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
4980 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
4981#endif
4982
4983 Assert(pTbAllocator->cThreadedTbs > 0);
4984 pTbAllocator->cThreadedTbs -= 1;
4985 pTbAllocator->cNativeTbs += 1;
4986 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
4987
4988#ifdef LOG_ENABLED
4989 /*
4990 * Disassemble to the log if enabled.
4991 */
4992 if (LogIs3Enabled())
4993 {
4994 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
4995 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
4996 }
4997#endif
4998
4999 return pTb;
5000}
5001
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette