VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101504

Last change on this file since 101504 was 101490, checked in by vboxsync, 14 months ago

VMM/IEM: Added missing guest register shadow copy flushing. nop sequence + loop works now. Removed some debugging stuff. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 145.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101490 2023-10-18 09:00:11Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): ...
18 * - Level 12 (Log12): Register allocator
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include "IEMInternal.h"
54#include <VBox/vmm/vmcc.h>
55#include <VBox/log.h>
56#include <VBox/err.h>
57#include <VBox/param.h>
58#include <iprt/assert.h>
59#include <iprt/heap.h>
60#include <iprt/mem.h>
61#include <iprt/string.h>
62#if defined(RT_ARCH_AMD64)
63# include <iprt/x86.h>
64#elif defined(RT_ARCH_ARM64)
65# include <iprt/armv8.h>
66#endif
67
68#ifdef RT_OS_WINDOWS
69# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
70extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
71extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
72#else
73# include <iprt/formats/dwarf.h>
74# if defined(RT_OS_DARWIN)
75# include <libkern/OSCacheControl.h>
76# define IEMNATIVE_USE_LIBUNWIND
77extern "C" void __register_frame(const void *pvFde);
78extern "C" void __deregister_frame(const void *pvFde);
79# else
80# ifdef DEBUG_bird /** @todo not thread safe yet */
81# define IEMNATIVE_USE_GDB_JIT
82# endif
83# ifdef IEMNATIVE_USE_GDB_JIT
84# include <iprt/critsect.h>
85# include <iprt/once.h>
86# include <iprt/formats/elf64.h>
87# endif
88extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
89extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
90# endif
91#endif
92
93#include "IEMInline.h"
94#include "IEMThreadedFunctions.h"
95#include "IEMN8veRecompiler.h"
96#include "IEMNativeFunctions.h"
97
98
99/*
100 * Narrow down configs here to avoid wasting time on unused configs here.
101 * Note! Same checks in IEMAllThrdRecompiler.cpp.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117/*********************************************************************************************************************************
118* Executable Memory Allocator *
119*********************************************************************************************************************************/
120/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
121 * Use an alternative chunk sub-allocator that does store internal data
122 * in the chunk.
123 *
124 * Using the RTHeapSimple is not practial on newer darwin systems where
125 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
126 * memory. We would have to change the protection of the whole chunk for
127 * every call to RTHeapSimple, which would be rather expensive.
128 *
129 * This alternative implemenation let restrict page protection modifications
130 * to the pages backing the executable memory we just allocated.
131 */
132#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
133/** The chunk sub-allocation unit size in bytes. */
134#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
135/** The chunk sub-allocation unit size as a shift factor. */
136#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
137
138#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
139# ifdef IEMNATIVE_USE_GDB_JIT
140# define IEMNATIVE_USE_GDB_JIT_ET_DYN
141
142/** GDB JIT: Code entry. */
143typedef struct GDBJITCODEENTRY
144{
145 struct GDBJITCODEENTRY *pNext;
146 struct GDBJITCODEENTRY *pPrev;
147 uint8_t *pbSymFile;
148 uint64_t cbSymFile;
149} GDBJITCODEENTRY;
150
151/** GDB JIT: Actions. */
152typedef enum GDBJITACTIONS : uint32_t
153{
154 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
155} GDBJITACTIONS;
156
157/** GDB JIT: Descriptor. */
158typedef struct GDBJITDESCRIPTOR
159{
160 uint32_t uVersion;
161 GDBJITACTIONS enmAction;
162 GDBJITCODEENTRY *pRelevant;
163 GDBJITCODEENTRY *pHead;
164 /** Our addition: */
165 GDBJITCODEENTRY *pTail;
166} GDBJITDESCRIPTOR;
167
168/** GDB JIT: Our simple symbol file data. */
169typedef struct GDBJITSYMFILE
170{
171 Elf64_Ehdr EHdr;
172# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
173 Elf64_Shdr aShdrs[5];
174# else
175 Elf64_Shdr aShdrs[6];
176 Elf64_Phdr aPhdrs[3];
177# endif
178 /** The dwarf ehframe data for the chunk. */
179 uint8_t abEhFrame[512];
180 char szzStrTab[128];
181 Elf64_Sym aSymbols[1];
182# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
183 Elf64_Dyn aDyn[6];
184# endif
185} GDBJITSYMFILE;
186
187extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
188extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
189
190/** Init once for g_IemNativeGdbJitLock. */
191static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
192/** Init once for the critical section. */
193static RTCRITSECT g_IemNativeGdbJitLock;
194
195/** GDB reads the info here. */
196GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
197
198/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
199DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
200{
201 ASMNopPause();
202}
203
204/** @callback_method_impl{FNRTONCE} */
205static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
206{
207 RT_NOREF(pvUser);
208 return RTCritSectInit(&g_IemNativeGdbJitLock);
209}
210
211
212# endif /* IEMNATIVE_USE_GDB_JIT */
213
214/**
215 * Per-chunk unwind info for non-windows hosts.
216 */
217typedef struct IEMEXECMEMCHUNKEHFRAME
218{
219# ifdef IEMNATIVE_USE_LIBUNWIND
220 /** The offset of the FDA into abEhFrame. */
221 uintptr_t offFda;
222# else
223 /** 'struct object' storage area. */
224 uint8_t abObject[1024];
225# endif
226# ifdef IEMNATIVE_USE_GDB_JIT
227# if 0
228 /** The GDB JIT 'symbol file' data. */
229 GDBJITSYMFILE GdbJitSymFile;
230# endif
231 /** The GDB JIT list entry. */
232 GDBJITCODEENTRY GdbJitEntry;
233# endif
234 /** The dwarf ehframe data for the chunk. */
235 uint8_t abEhFrame[512];
236} IEMEXECMEMCHUNKEHFRAME;
237/** Pointer to per-chunk info info for non-windows hosts. */
238typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
239#endif
240
241
242/**
243 * An chunk of executable memory.
244 */
245typedef struct IEMEXECMEMCHUNK
246{
247#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
248 /** Number of free items in this chunk. */
249 uint32_t cFreeUnits;
250 /** Hint were to start searching for free space in the allocation bitmap. */
251 uint32_t idxFreeHint;
252#else
253 /** The heap handle. */
254 RTHEAPSIMPLE hHeap;
255#endif
256 /** Pointer to the chunk. */
257 void *pvChunk;
258#ifdef IN_RING3
259 /**
260 * Pointer to the unwind information.
261 *
262 * This is used during C++ throw and longjmp (windows and probably most other
263 * platforms). Some debuggers (windbg) makes use of it as well.
264 *
265 * Windows: This is allocated from hHeap on windows because (at least for
266 * AMD64) the UNWIND_INFO structure address in the
267 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
268 *
269 * Others: Allocated from the regular heap to avoid unnecessary executable data
270 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
271 void *pvUnwindInfo;
272#elif defined(IN_RING0)
273 /** Allocation handle. */
274 RTR0MEMOBJ hMemObj;
275#endif
276} IEMEXECMEMCHUNK;
277/** Pointer to a memory chunk. */
278typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
279
280
281/**
282 * Executable memory allocator for the native recompiler.
283 */
284typedef struct IEMEXECMEMALLOCATOR
285{
286 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
287 uint32_t uMagic;
288
289 /** The chunk size. */
290 uint32_t cbChunk;
291 /** The maximum number of chunks. */
292 uint32_t cMaxChunks;
293 /** The current number of chunks. */
294 uint32_t cChunks;
295 /** Hint where to start looking for available memory. */
296 uint32_t idxChunkHint;
297 /** Statistics: Current number of allocations. */
298 uint32_t cAllocations;
299
300 /** The total amount of memory available. */
301 uint64_t cbTotal;
302 /** Total amount of free memory. */
303 uint64_t cbFree;
304 /** Total amount of memory allocated. */
305 uint64_t cbAllocated;
306
307#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
308 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
309 *
310 * Since the chunk size is a power of two and the minimum chunk size is a lot
311 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
312 * require a whole number of uint64_t elements in the allocation bitmap. So,
313 * for sake of simplicity, they are allocated as one continous chunk for
314 * simplicity/laziness. */
315 uint64_t *pbmAlloc;
316 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
317 uint32_t cUnitsPerChunk;
318 /** Number of bitmap elements per chunk (for quickly locating the bitmap
319 * portion corresponding to an chunk). */
320 uint32_t cBitmapElementsPerChunk;
321#else
322 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
323 * @{ */
324 /** The size of the heap internal block header. This is used to adjust the
325 * request memory size to make sure there is exacly enough room for a header at
326 * the end of the blocks we allocate before the next 64 byte alignment line. */
327 uint32_t cbHeapBlockHdr;
328 /** The size of initial heap allocation required make sure the first
329 * allocation is correctly aligned. */
330 uint32_t cbHeapAlignTweak;
331 /** The alignment tweak allocation address. */
332 void *pvAlignTweak;
333 /** @} */
334#endif
335
336#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
337 /** Pointer to the array of unwind info running parallel to aChunks (same
338 * allocation as this structure, located after the bitmaps).
339 * (For Windows, the structures must reside in 32-bit RVA distance to the
340 * actual chunk, so they are allocated off the chunk.) */
341 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
342#endif
343
344 /** The allocation chunks. */
345 RT_FLEXIBLE_ARRAY_EXTENSION
346 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
347} IEMEXECMEMALLOCATOR;
348/** Pointer to an executable memory allocator. */
349typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
350
351/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
352#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
353
354
355static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator);
356
357
358/**
359 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
360 * the heap statistics.
361 */
362static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
363 uint32_t cbReq, uint32_t idxChunk)
364{
365 pExecMemAllocator->cAllocations += 1;
366 pExecMemAllocator->cbAllocated += cbReq;
367#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
368 pExecMemAllocator->cbFree -= cbReq;
369#else
370 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
371#endif
372 pExecMemAllocator->idxChunkHint = idxChunk;
373
374#ifdef RT_OS_DARWIN
375 /*
376 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
377 * on darwin. So, we mark the pages returned as read+write after alloc and
378 * expect the caller to call iemExecMemAllocatorReadyForUse when done
379 * writing to the allocation.
380 *
381 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
382 * for details.
383 */
384 /** @todo detect if this is necessary... it wasn't required on 10.15 or
385 * whatever older version it was. */
386 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
387 AssertRC(rc);
388#endif
389
390 return pvRet;
391}
392
393
394#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
395static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
396 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
397{
398 /*
399 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
400 */
401 Assert(!(cToScan & 63));
402 Assert(!(idxFirst & 63));
403 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
404 pbmAlloc += idxFirst / 64;
405
406 /*
407 * Scan the bitmap for cReqUnits of consequtive clear bits
408 */
409 /** @todo This can probably be done more efficiently for non-x86 systems. */
410 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
411 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
412 {
413 uint32_t idxAddBit = 1;
414 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
415 idxAddBit++;
416 if (idxAddBit >= cReqUnits)
417 {
418 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
419
420 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
421 pChunk->cFreeUnits -= cReqUnits;
422 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
423
424 void * const pvRet = (uint8_t *)pChunk->pvChunk
425 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
426
427 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
428 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
429 }
430
431 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
432 }
433 return NULL;
434}
435#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
436
437
438static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
439{
440#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
441 /*
442 * Figure out how much to allocate.
443 */
444 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
445 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
446 {
447 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
448 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
449 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
450 {
451 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
452 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
453 if (pvRet)
454 return pvRet;
455 }
456 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
457 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
458 cReqUnits, idxChunk);
459 }
460#else
461 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
462 if (pvRet)
463 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
464#endif
465 return NULL;
466
467}
468
469
470/**
471 * Allocates @a cbReq bytes of executable memory.
472 *
473 * @returns Pointer to the memory, NULL if out of memory or other problem
474 * encountered.
475 * @param pVCpu The cross context virtual CPU structure of the calling
476 * thread.
477 * @param cbReq How many bytes are required.
478 */
479static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
480{
481 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
482 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
483 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
484
485 /*
486 * Adjust the request size so it'll fit the allocator alignment/whatnot.
487 *
488 * For the RTHeapSimple allocator this means to follow the logic described
489 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
490 * existing chunks if we think we've got sufficient free memory around.
491 *
492 * While for the alternative one we just align it up to a whole unit size.
493 */
494#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
495 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
496#else
497 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
498#endif
499 if (cbReq <= pExecMemAllocator->cbFree)
500 {
501 uint32_t const cChunks = pExecMemAllocator->cChunks;
502 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
503 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
504 {
505 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
506 if (pvRet)
507 return pvRet;
508 }
509 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
510 {
511 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
512 if (pvRet)
513 return pvRet;
514 }
515 }
516
517 /*
518 * Can we grow it with another chunk?
519 */
520 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
521 {
522 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
523 AssertLogRelRCReturn(rc, NULL);
524
525 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
526 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
527 if (pvRet)
528 return pvRet;
529 AssertFailed();
530 }
531
532 /* What now? Prune native translation blocks from the cache? */
533 AssertFailed();
534 return NULL;
535}
536
537
538/** This is a hook that we may need later for changing memory protection back
539 * to readonly+exec */
540static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
541{
542#ifdef RT_OS_DARWIN
543 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
544 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
545 AssertRC(rc); RT_NOREF(pVCpu);
546
547 /*
548 * Flush the instruction cache:
549 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
550 */
551 /* sys_dcache_flush(pv, cb); - not necessary */
552 sys_icache_invalidate(pv, cb);
553#else
554 RT_NOREF(pVCpu, pv, cb);
555#endif
556}
557
558
559/**
560 * Frees executable memory.
561 */
562void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
563{
564 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
565 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
566 Assert(pv);
567#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
568 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
569#else
570 Assert(!((uintptr_t)pv & 63));
571#endif
572
573 /* Align the size as we did when allocating the block. */
574#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
575 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
576#else
577 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
578#endif
579
580 /* Free it / assert sanity. */
581#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
582 uint32_t const cChunks = pExecMemAllocator->cChunks;
583 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
584 bool fFound = false;
585 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
586 {
587 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
588 fFound = offChunk < cbChunk;
589 if (fFound)
590 {
591#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
592 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
593 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
594
595 /* Check that it's valid and free it. */
596 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
597 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
598 for (uint32_t i = 1; i < cReqUnits; i++)
599 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
600 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
601
602 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
603 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
604
605 /* Update the stats. */
606 pExecMemAllocator->cbAllocated -= cb;
607 pExecMemAllocator->cbFree += cb;
608 pExecMemAllocator->cAllocations -= 1;
609 return;
610#else
611 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
612 break;
613#endif
614 }
615 }
616# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
617 AssertFailed();
618# else
619 Assert(fFound);
620# endif
621#endif
622
623#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
624 /* Update stats while cb is freshly calculated.*/
625 pExecMemAllocator->cbAllocated -= cb;
626 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
627 pExecMemAllocator->cAllocations -= 1;
628
629 /* Free it. */
630 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
631#endif
632}
633
634
635
636#ifdef IN_RING3
637# ifdef RT_OS_WINDOWS
638
639/**
640 * Initializes the unwind info structures for windows hosts.
641 */
642static int
643iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
644{
645 /*
646 * The AMD64 unwind opcodes.
647 *
648 * This is a program that starts with RSP after a RET instruction that
649 * ends up in recompiled code, and the operations we describe here will
650 * restore all non-volatile registers and bring RSP back to where our
651 * RET address is. This means it's reverse order from what happens in
652 * the prologue.
653 *
654 * Note! Using a frame register approach here both because we have one
655 * and but mainly because the UWOP_ALLOC_LARGE argument values
656 * would be a pain to write initializers for. On the positive
657 * side, we're impervious to changes in the the stack variable
658 * area can can deal with dynamic stack allocations if necessary.
659 */
660 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
661 {
662 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
663 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
664 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
665 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
666 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
667 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
668 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
669 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
670 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
671 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
672 };
673 union
674 {
675 IMAGE_UNWIND_INFO Info;
676 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
677 } s_UnwindInfo =
678 {
679 {
680 /* .Version = */ 1,
681 /* .Flags = */ 0,
682 /* .SizeOfProlog = */ 16, /* whatever */
683 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
684 /* .FrameRegister = */ X86_GREG_xBP,
685 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
686 }
687 };
688 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
689 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
690
691 /*
692 * Calc how much space we need and allocate it off the exec heap.
693 */
694 unsigned const cFunctionEntries = 1;
695 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
696 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
697# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
698 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
699 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
700 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
701# else
702 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
703 - pExecMemAllocator->cbHeapBlockHdr;
704 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
705 32 /*cbAlignment*/);
706# endif
707 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
708 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
709
710 /*
711 * Initialize the structures.
712 */
713 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
714
715 paFunctions[0].BeginAddress = 0;
716 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
717 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
718
719 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
720 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
721
722 /*
723 * Register it.
724 */
725 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
726 AssertReturn(fRet, NULL); /* Nothing to clean up on failure, since its within the chunk itself. */
727
728 return paFunctions;
729}
730
731
732# else /* !RT_OS_WINDOWS */
733
734/**
735 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
736 */
737DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
738{
739 if (iValue >= 64)
740 {
741 Assert(iValue < 0x2000);
742 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
743 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
744 }
745 else if (iValue >= 0)
746 *Ptr.pb++ = (uint8_t)iValue;
747 else if (iValue > -64)
748 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
749 else
750 {
751 Assert(iValue > -0x2000);
752 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
753 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
754 }
755 return Ptr;
756}
757
758
759/**
760 * Emits an ULEB128 encoded value (up to 64-bit wide).
761 */
762DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
763{
764 while (uValue >= 0x80)
765 {
766 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
767 uValue >>= 7;
768 }
769 *Ptr.pb++ = (uint8_t)uValue;
770 return Ptr;
771}
772
773
774/**
775 * Emits a CFA rule as register @a uReg + offset @a off.
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
778{
779 *Ptr.pb++ = DW_CFA_def_cfa;
780 Ptr = iemDwarfPutUleb128(Ptr, uReg);
781 Ptr = iemDwarfPutUleb128(Ptr, off);
782 return Ptr;
783}
784
785
786/**
787 * Emits a register (@a uReg) save location:
788 * CFA + @a off * data_alignment_factor
789 */
790DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
791{
792 if (uReg < 0x40)
793 *Ptr.pb++ = DW_CFA_offset | uReg;
794 else
795 {
796 *Ptr.pb++ = DW_CFA_offset_extended;
797 Ptr = iemDwarfPutUleb128(Ptr, uReg);
798 }
799 Ptr = iemDwarfPutUleb128(Ptr, off);
800 return Ptr;
801}
802
803
804# if 0 /* unused */
805/**
806 * Emits a register (@a uReg) save location, using signed offset:
807 * CFA + @a offSigned * data_alignment_factor
808 */
809DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
810{
811 *Ptr.pb++ = DW_CFA_offset_extended_sf;
812 Ptr = iemDwarfPutUleb128(Ptr, uReg);
813 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
814 return Ptr;
815}
816# endif
817
818
819/**
820 * Initializes the unwind info section for non-windows hosts.
821 */
822static int
823iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
824{
825 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
826 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
827
828 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
829
830 /*
831 * Generate the CIE first.
832 */
833# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
834 uint8_t const iDwarfVer = 3;
835# else
836 uint8_t const iDwarfVer = 4;
837# endif
838 RTPTRUNION const PtrCie = Ptr;
839 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
840 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
841 *Ptr.pb++ = iDwarfVer; /* DwARF version */
842 *Ptr.pb++ = 0; /* Augmentation. */
843 if (iDwarfVer >= 4)
844 {
845 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
846 *Ptr.pb++ = 0; /* Segment selector size. */
847 }
848# ifdef RT_ARCH_AMD64
849 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
850# else
851 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
852# endif
853 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
854# ifdef RT_ARCH_AMD64
855 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
856# elif defined(RT_ARCH_ARM64)
857 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
858# else
859# error "port me"
860# endif
861 /* Initial instructions: */
862# ifdef RT_ARCH_AMD64
863 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
864 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
865 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
866 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
867 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
868 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
869 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
870 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
871# elif defined(RT_ARCH_ARM64)
872# if 1
873 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
874# else
875 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
876# endif
877 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
878 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
879 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
880 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
881 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
882 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
883 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
884 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
885 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
886 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
887 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
888 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
889 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
890 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
891# else
892# error "port me"
893# endif
894 while ((Ptr.u - PtrCie.u) & 3)
895 *Ptr.pb++ = DW_CFA_nop;
896 /* Finalize the CIE size. */
897 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
898
899 /*
900 * Generate an FDE for the whole chunk area.
901 */
902# ifdef IEMNATIVE_USE_LIBUNWIND
903 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
904# endif
905 RTPTRUNION const PtrFde = Ptr;
906 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
907 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
908 Ptr.pu32++;
909 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
910 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
911# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
912 *Ptr.pb++ = DW_CFA_nop;
913# endif
914 while ((Ptr.u - PtrFde.u) & 3)
915 *Ptr.pb++ = DW_CFA_nop;
916 /* Finalize the FDE size. */
917 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
918
919 /* Terminator entry. */
920 *Ptr.pu32++ = 0;
921 *Ptr.pu32++ = 0; /* just to be sure... */
922 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
923
924 /*
925 * Register it.
926 */
927# ifdef IEMNATIVE_USE_LIBUNWIND
928 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
929# else
930 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
931 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
932# endif
933
934# ifdef IEMNATIVE_USE_GDB_JIT
935 /*
936 * Now for telling GDB about this (experimental).
937 *
938 * This seems to work best with ET_DYN.
939 */
940 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
941# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
942 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
943 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
944# else
945 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
946 - pExecMemAllocator->cbHeapBlockHdr;
947 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
948# endif
949 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
950 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
951
952 RT_ZERO(*pSymFile);
953 /* The ELF header: */
954 pSymFile->EHdr.e_ident[0] = ELFMAG0;
955 pSymFile->EHdr.e_ident[1] = ELFMAG1;
956 pSymFile->EHdr.e_ident[2] = ELFMAG2;
957 pSymFile->EHdr.e_ident[3] = ELFMAG3;
958 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
959 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
960 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
961 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
962# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
963 pSymFile->EHdr.e_type = ET_DYN;
964# else
965 pSymFile->EHdr.e_type = ET_REL;
966# endif
967# ifdef RT_ARCH_AMD64
968 pSymFile->EHdr.e_machine = EM_AMD64;
969# elif defined(RT_ARCH_ARM64)
970 pSymFile->EHdr.e_machine = EM_AARCH64;
971# else
972# error "port me"
973# endif
974 pSymFile->EHdr.e_version = 1; /*?*/
975 pSymFile->EHdr.e_entry = 0;
976# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
977 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
978# else
979 pSymFile->EHdr.e_phoff = 0;
980# endif
981 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
982 pSymFile->EHdr.e_flags = 0;
983 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
984# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
985 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
986 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
987# else
988 pSymFile->EHdr.e_phentsize = 0;
989 pSymFile->EHdr.e_phnum = 0;
990# endif
991 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
992 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
993 pSymFile->EHdr.e_shstrndx = 0; /* set later */
994
995 uint32_t offStrTab = 0;
996#define APPEND_STR(a_szStr) do { \
997 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
998 offStrTab += sizeof(a_szStr); \
999 } while (0)
1000 /* Section header #0: NULL */
1001 unsigned i = 0;
1002 APPEND_STR("");
1003 RT_ZERO(pSymFile->aShdrs[i]);
1004 i++;
1005
1006 /* Section header: .eh_frame */
1007 pSymFile->aShdrs[i].sh_name = offStrTab;
1008 APPEND_STR(".eh_frame");
1009 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1010 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1011# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1012 pSymFile->aShdrs[i].sh_offset
1013 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1014# else
1015 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1016 pSymFile->aShdrs[i].sh_offset = 0;
1017# endif
1018
1019 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1020 pSymFile->aShdrs[i].sh_link = 0;
1021 pSymFile->aShdrs[i].sh_info = 0;
1022 pSymFile->aShdrs[i].sh_addralign = 1;
1023 pSymFile->aShdrs[i].sh_entsize = 0;
1024 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1025 i++;
1026
1027 /* Section header: .shstrtab */
1028 unsigned const iShStrTab = i;
1029 pSymFile->EHdr.e_shstrndx = iShStrTab;
1030 pSymFile->aShdrs[i].sh_name = offStrTab;
1031 APPEND_STR(".shstrtab");
1032 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1033 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1035 pSymFile->aShdrs[i].sh_offset
1036 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1037# else
1038 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1039 pSymFile->aShdrs[i].sh_offset = 0;
1040# endif
1041 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1042 pSymFile->aShdrs[i].sh_link = 0;
1043 pSymFile->aShdrs[i].sh_info = 0;
1044 pSymFile->aShdrs[i].sh_addralign = 1;
1045 pSymFile->aShdrs[i].sh_entsize = 0;
1046 i++;
1047
1048 /* Section header: .symbols */
1049 pSymFile->aShdrs[i].sh_name = offStrTab;
1050# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1051 APPEND_STR(".dynsym");
1052 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1053# else
1054 APPEND_STR(".symtab");
1055 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1056# endif
1057 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1058 pSymFile->aShdrs[i].sh_offset
1059 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1060 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1061 pSymFile->aShdrs[i].sh_link = iShStrTab;
1062 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1063 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1064 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1065 i++;
1066
1067# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1068 /* Section header: .dynamic */
1069 pSymFile->aShdrs[i].sh_name = offStrTab;
1070 APPEND_STR(".dynamic");
1071 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1072 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1073 pSymFile->aShdrs[i].sh_offset
1074 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1075 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1076 pSymFile->aShdrs[i].sh_link = iShStrTab;
1077 pSymFile->aShdrs[i].sh_info = 0;
1078 pSymFile->aShdrs[i].sh_addralign = 1;
1079 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1080 i++;
1081# endif
1082
1083 /* Section header: .text */
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".text");
1086 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1103
1104# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1105 /*
1106 * The program headers:
1107 */
1108 /* Headers and whatnot up to .dynamic: */
1109 i = 0;
1110 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1111 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1112 pSymFile->aPhdrs[i].p_offset
1113 = pSymFile->aPhdrs[i].p_vaddr
1114 = pSymFile->aPhdrs[i].p_paddr = 0;
1115 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1116 = pSymFile->aPhdrs[i].p_memsz = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1117 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1118 i++;
1119 /* .dynamic */
1120 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1121 pSymFile->aPhdrs[i].p_flags = PF_R;
1122 pSymFile->aPhdrs[i].p_offset
1123 = pSymFile->aPhdrs[i].p_vaddr
1124 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1125 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1126 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1127 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1128 i++;
1129 /* The rest of the chunk. */
1130 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1131 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1132 pSymFile->aPhdrs[i].p_offset
1133 = pSymFile->aPhdrs[i].p_vaddr
1134 = pSymFile->aPhdrs[i].p_paddr = sizeof(GDBJITSYMFILE);
1135 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1136 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1137 pSymFile->aPhdrs[i].p_align = 1;
1138 i++;
1139
1140 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1141
1142 /* The dynamic section: */
1143 i = 0;
1144 pSymFile->aDyn[i].d_tag = DT_SONAME;
1145 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1146 APPEND_STR("iem-native.so");
1147 i++;
1148 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1149 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1150 i++;
1151 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1152 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1153 i++;
1154 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1155 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1156 i++;
1157 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1158 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aSymbols[0]);
1159 i++;
1160 pSymFile->aDyn[i].d_tag = DT_NULL;
1161 i++;
1162 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1163# endif
1164
1165 /* Symbol table: */
1166 i = 0;
1167 pSymFile->aSymbols[i].st_name = offStrTab;
1168 APPEND_STR("iem_exec_chunk");
1169 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1170 pSymFile->aSymbols[i].st_value = (uintptr_t)pvChunk;
1171 pSymFile->aSymbols[i].st_size = pExecMemAllocator->cbChunk;
1172 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FUNC);
1173 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1174 i++;
1175 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1176 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1177
1178 /* The GDB JIT entry: */
1179 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1180# if 1
1181 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1182# else
1183 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1184# endif
1185
1186 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1187 RTCritSectEnter(&g_IemNativeGdbJitLock);
1188 pEhFrame->GdbJitEntry.pNext = NULL;
1189 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1190 if (__jit_debug_descriptor.pTail)
1191 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1192 else
1193 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1194 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1195 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1196
1197 /* Notify GDB: */
1198 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1199 __jit_debug_register_code();
1200 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1201 RTCritSectLeave(&g_IemNativeGdbJitLock);
1202
1203# endif /* IEMNATIVE_USE_GDB_JIT */
1204
1205 return VINF_SUCCESS;
1206}
1207
1208# endif /* !RT_OS_WINDOWS */
1209#endif /* IN_RING3 */
1210
1211
1212/**
1213 * Adds another chunk to the executable memory allocator.
1214 *
1215 * This is used by the init code for the initial allocation and later by the
1216 * regular allocator function when it's out of memory.
1217 */
1218static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator)
1219{
1220 /* Check that we've room for growth. */
1221 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1222 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1223
1224 /* Allocate a chunk. */
1225#ifdef RT_OS_DARWIN
1226 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1227#else
1228 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1229#endif
1230 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1231
1232#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1233 int rc = VINF_SUCCESS;
1234#else
1235 /* Initialize the heap for the chunk. */
1236 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1237 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1238 AssertRC(rc);
1239 if (RT_SUCCESS(rc))
1240 {
1241 /*
1242 * We want the memory to be aligned on 64 byte, so the first time thru
1243 * here we do some exploratory allocations to see how we can achieve this.
1244 * On subsequent runs we only make an initial adjustment allocation, if
1245 * necessary.
1246 *
1247 * Since we own the heap implementation, we know that the internal block
1248 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1249 * so all we need to wrt allocation size adjustments is to add 32 bytes
1250 * to the size, align up by 64 bytes, and subtract 32 bytes.
1251 *
1252 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1253 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1254 * allocation to force subsequent allocations to return 64 byte aligned
1255 * user areas.
1256 */
1257 if (!pExecMemAllocator->cbHeapBlockHdr)
1258 {
1259 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1260 pExecMemAllocator->cbHeapAlignTweak = 64;
1261 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1262 32 /*cbAlignment*/);
1263 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1264
1265 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1266 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1267 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1268 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1269 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1270
1271 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1272 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1273 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1274 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1275 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1276
1277 RTHeapSimpleFree(hHeap, pvTest2);
1278 RTHeapSimpleFree(hHeap, pvTest1);
1279 }
1280 else
1281 {
1282 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1283 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1284 }
1285 if (RT_SUCCESS(rc))
1286#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1287 {
1288 /*
1289 * Add the chunk.
1290 *
1291 * This must be done before the unwind init so windows can allocate
1292 * memory from the chunk when using the alternative sub-allocator.
1293 */
1294 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1295#ifdef IN_RING3
1296 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1297#endif
1298#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1299 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1300#else
1301 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1302 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1303 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1304 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1305#endif
1306
1307 pExecMemAllocator->cChunks = idxChunk + 1;
1308 pExecMemAllocator->idxChunkHint = idxChunk;
1309
1310#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1311 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1312 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1313#else
1314 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1315 pExecMemAllocator->cbTotal += cbFree;
1316 pExecMemAllocator->cbFree += cbFree;
1317#endif
1318
1319#ifdef IN_RING3
1320 /*
1321 * Initialize the unwind information (this cannot really fail atm).
1322 * (This sets pvUnwindInfo.)
1323 */
1324 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk, idxChunk);
1325 if (RT_SUCCESS(rc))
1326#endif
1327 {
1328 return VINF_SUCCESS;
1329 }
1330
1331#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1332 /* Just in case the impossible happens, undo the above up: */
1333 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1334 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1335 pExecMemAllocator->cChunks = idxChunk;
1336 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1337 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1338 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1339 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1340#endif
1341 }
1342#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1343 }
1344#endif
1345 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1346 return rc;
1347}
1348
1349
1350/**
1351 * Initializes the executable memory allocator for native recompilation on the
1352 * calling EMT.
1353 *
1354 * @returns VBox status code.
1355 * @param pVCpu The cross context virtual CPU structure of the calling
1356 * thread.
1357 * @param cbMax The max size of the allocator.
1358 * @param cbInitial The initial allocator size.
1359 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1360 * dependent).
1361 */
1362int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1363{
1364 /*
1365 * Validate input.
1366 */
1367 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1368 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1369 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1370 || cbChunk == 0
1371 || ( RT_IS_POWER_OF_TWO(cbChunk)
1372 && cbChunk >= _1M
1373 && cbChunk <= _256M
1374 && cbChunk <= cbMax),
1375 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1376 VERR_OUT_OF_RANGE);
1377
1378 /*
1379 * Adjust/figure out the chunk size.
1380 */
1381 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1382 {
1383 if (cbMax >= _256M)
1384 cbChunk = _64M;
1385 else
1386 {
1387 if (cbMax < _16M)
1388 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1389 else
1390 cbChunk = (uint32_t)cbMax / 4;
1391 if (!RT_IS_POWER_OF_TWO(cbChunk))
1392 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1393 }
1394 }
1395
1396 if (cbChunk > cbMax)
1397 cbMax = cbChunk;
1398 else
1399 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1400 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1401 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1402
1403 /*
1404 * Allocate and initialize the allocatore instance.
1405 */
1406 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1407#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1408 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1409 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1410 cbNeeded += cbBitmap * cMaxChunks;
1411 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1412 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1413#endif
1414#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1415 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1416 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1417#endif
1418 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1419 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1420 VERR_NO_MEMORY);
1421 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1422 pExecMemAllocator->cbChunk = cbChunk;
1423 pExecMemAllocator->cMaxChunks = cMaxChunks;
1424 pExecMemAllocator->cChunks = 0;
1425 pExecMemAllocator->idxChunkHint = 0;
1426 pExecMemAllocator->cAllocations = 0;
1427 pExecMemAllocator->cbTotal = 0;
1428 pExecMemAllocator->cbFree = 0;
1429 pExecMemAllocator->cbAllocated = 0;
1430#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1431 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1432 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1433 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1434 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1435#endif
1436#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1437 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1438#endif
1439 for (uint32_t i = 0; i < cMaxChunks; i++)
1440 {
1441#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1442 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1443 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1444#else
1445 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1446#endif
1447 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1448#ifdef IN_RING0
1449 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1450#else
1451 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1452#endif
1453 }
1454 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1455
1456 /*
1457 * Do the initial allocations.
1458 */
1459 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1460 {
1461 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
1462 AssertLogRelRCReturn(rc, rc);
1463 }
1464
1465 pExecMemAllocator->idxChunkHint = 0;
1466
1467 return VINF_SUCCESS;
1468}
1469
1470
1471/*********************************************************************************************************************************
1472* Native Recompilation *
1473*********************************************************************************************************************************/
1474
1475
1476/**
1477 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1478 */
1479IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1480{
1481 pVCpu->iem.s.cInstructions += idxInstr;
1482 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1483}
1484
1485
1486/**
1487 * Reinitializes the native recompiler state.
1488 *
1489 * Called before starting a new recompile job.
1490 */
1491static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1492{
1493 pReNative->cLabels = 0;
1494 pReNative->cFixups = 0;
1495 pReNative->pTbOrg = pTb;
1496
1497 pReNative->bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1498#if IEMNATIVE_HST_GREG_COUNT < 32
1499 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1500#endif
1501 ;
1502 pReNative->bmHstRegsWithGstShadow = 0;
1503 pReNative->bmGstRegShadows = 0;
1504 pReNative->bmVars = 0;
1505 pReNative->u64ArgVars = UINT64_MAX;
1506
1507 /* Full host register reinit: */
1508 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->aHstRegs); i++)
1509 {
1510 pReNative->aHstRegs[i].fGstRegShadows = 0;
1511 pReNative->aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1512 pReNative->aHstRegs[i].idxVar = UINT8_MAX;
1513 }
1514
1515 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1516 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1517#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1518 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1519#endif
1520#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1521 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1522#endif
1523 );
1524 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1525 {
1526 fRegs &= ~RT_BIT_32(idxReg);
1527 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1528 }
1529
1530 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1531#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1532 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1533#endif
1534#ifdef IEMNATIVE_REG_FIXED_TMP0
1535 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1536#endif
1537 return pReNative;
1538}
1539
1540
1541/**
1542 * Allocates and initializes the native recompiler state.
1543 *
1544 * This is called the first time an EMT wants to recompile something.
1545 *
1546 * @returns Pointer to the new recompiler state.
1547 * @param pVCpu The cross context virtual CPU structure of the calling
1548 * thread.
1549 * @param pTb The TB that's about to be recompiled.
1550 * @thread EMT(pVCpu)
1551 */
1552static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1553{
1554 VMCPU_ASSERT_EMT(pVCpu);
1555
1556 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1557 AssertReturn(pReNative, NULL);
1558
1559 /*
1560 * Try allocate all the buffers and stuff we need.
1561 */
1562 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1563 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1564 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1565 if (RT_LIKELY( pReNative->pInstrBuf
1566 && pReNative->paLabels
1567 && pReNative->paFixups))
1568 {
1569 /*
1570 * Set the buffer & array sizes on success.
1571 */
1572 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1573 pReNative->cLabelsAlloc = _8K;
1574 pReNative->cFixupsAlloc = _16K;
1575
1576 /*
1577 * Done, just need to save it and reinit it.
1578 */
1579 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1580 return iemNativeReInit(pReNative, pTb);
1581 }
1582
1583 /*
1584 * Failed. Cleanup and return.
1585 */
1586 AssertFailed();
1587 RTMemFree(pReNative->pInstrBuf);
1588 RTMemFree(pReNative->paLabels);
1589 RTMemFree(pReNative->paFixups);
1590 RTMemFree(pReNative);
1591 return NULL;
1592}
1593
1594
1595/**
1596 * Defines a label.
1597 *
1598 * @returns Label ID.
1599 * @param pReNative The native recompile state.
1600 * @param enmType The label type.
1601 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1602 * label is not yet defined (default).
1603 * @param uData Data associated with the lable. Only applicable to
1604 * certain type of labels. Default is zero.
1605 */
1606DECLHIDDEN(uint32_t) iemNativeMakeLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1607 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
1608{
1609 /*
1610 * Do we have the label already?
1611 */
1612 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1613 uint32_t const cLabels = pReNative->cLabels;
1614 for (uint32_t i = 0; i < cLabels; i++)
1615 if ( paLabels[i].enmType == enmType
1616 && paLabels[i].uData == uData)
1617 {
1618 if (paLabels[i].off == offWhere || offWhere == UINT32_MAX)
1619 return i;
1620 if (paLabels[i].off == UINT32_MAX)
1621 {
1622 paLabels[i].off = offWhere;
1623 return i;
1624 }
1625 }
1626
1627 /*
1628 * Make sure we've got room for another label.
1629 */
1630 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1631 { /* likely */ }
1632 else
1633 {
1634 uint32_t cNew = pReNative->cLabelsAlloc;
1635 AssertReturn(cNew, UINT32_MAX);
1636 AssertReturn(cLabels == cNew, UINT32_MAX);
1637 cNew *= 2;
1638 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1639 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1640 AssertReturn(paLabels, UINT32_MAX);
1641 pReNative->paLabels = paLabels;
1642 pReNative->cLabelsAlloc = cNew;
1643 }
1644
1645 /*
1646 * Define a new label.
1647 */
1648 paLabels[cLabels].off = offWhere;
1649 paLabels[cLabels].enmType = enmType;
1650 paLabels[cLabels].uData = uData;
1651 pReNative->cLabels = cLabels + 1;
1652 return cLabels;
1653}
1654
1655
1656/**
1657 * Looks up a lable.
1658 *
1659 * @returns Label ID if found, UINT32_MAX if not.
1660 */
1661static uint32_t iemNativeFindLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1662 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1663{
1664 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1665 uint32_t const cLabels = pReNative->cLabels;
1666 for (uint32_t i = 0; i < cLabels; i++)
1667 if ( paLabels[i].enmType == enmType
1668 && paLabels[i].uData == uData
1669 && ( paLabels[i].off == offWhere
1670 || offWhere == UINT32_MAX
1671 || paLabels[i].off == UINT32_MAX))
1672 return i;
1673 return UINT32_MAX;
1674}
1675
1676
1677
1678/**
1679 * Adds a fixup.
1680 *
1681 * @returns Success indicator.
1682 * @param pReNative The native recompile state.
1683 * @param offWhere The instruction offset of the fixup location.
1684 * @param idxLabel The target label ID for the fixup.
1685 * @param enmType The fixup type.
1686 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1687 */
1688DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1689 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/) RT_NOEXCEPT
1690{
1691 Assert(idxLabel <= UINT16_MAX);
1692 Assert((unsigned)enmType <= UINT8_MAX);
1693
1694 /*
1695 * Make sure we've room.
1696 */
1697 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1698 uint32_t const cFixups = pReNative->cFixups;
1699 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1700 { /* likely */ }
1701 else
1702 {
1703 uint32_t cNew = pReNative->cFixupsAlloc;
1704 AssertReturn(cNew, false);
1705 AssertReturn(cFixups == cNew, false);
1706 cNew *= 2;
1707 AssertReturn(cNew <= _128K, false);
1708 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1709 AssertReturn(paFixups, false);
1710 pReNative->paFixups = paFixups;
1711 pReNative->cFixupsAlloc = cNew;
1712 }
1713
1714 /*
1715 * Add the fixup.
1716 */
1717 paFixups[cFixups].off = offWhere;
1718 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1719 paFixups[cFixups].enmType = enmType;
1720 paFixups[cFixups].offAddend = offAddend;
1721 pReNative->cFixups = cFixups + 1;
1722 return true;
1723}
1724
1725/**
1726 * Slow code path for iemNativeInstrBufEnsure.
1727 */
1728DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1729 uint32_t cInstrReq) RT_NOEXCEPT
1730{
1731 /* Double the buffer size till we meet the request. */
1732 uint32_t cNew = pReNative->cInstrBufAlloc;
1733 AssertReturn(cNew > 0, NULL);
1734 do
1735 cNew *= 2;
1736 while (cNew < off + cInstrReq);
1737
1738 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1739 AssertReturn(cbNew <= _2M, NULL);
1740
1741 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1742 AssertReturn(pvNew, NULL);
1743
1744 pReNative->cInstrBufAlloc = cNew;
1745 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1746}
1747
1748
1749/**
1750 * Register parameter indexes (indexed by argument number).
1751 */
1752DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
1753{
1754 IEMNATIVE_CALL_ARG0_GREG,
1755 IEMNATIVE_CALL_ARG1_GREG,
1756 IEMNATIVE_CALL_ARG2_GREG,
1757 IEMNATIVE_CALL_ARG3_GREG,
1758#if defined(IEMNATIVE_CALL_ARG4_GREG)
1759 IEMNATIVE_CALL_ARG4_GREG,
1760# if defined(IEMNATIVE_CALL_ARG5_GREG)
1761 IEMNATIVE_CALL_ARG5_GREG,
1762# if defined(IEMNATIVE_CALL_ARG6_GREG)
1763 IEMNATIVE_CALL_ARG6_GREG,
1764# if defined(IEMNATIVE_CALL_ARG7_GREG)
1765 IEMNATIVE_CALL_ARG7_GREG,
1766# endif
1767# endif
1768# endif
1769#endif
1770};
1771
1772/**
1773 * Call register masks indexed by argument count.
1774 */
1775DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
1776{
1777 0,
1778 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
1779 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
1780 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
1781 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1782 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
1783#if defined(IEMNATIVE_CALL_ARG4_GREG)
1784 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1785 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
1786# if defined(IEMNATIVE_CALL_ARG5_GREG)
1787 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1788 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
1789# if defined(IEMNATIVE_CALL_ARG6_GREG)
1790 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1791 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
1792 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
1793# if defined(IEMNATIVE_CALL_ARG7_GREG)
1794 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1795 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
1796 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
1797# endif
1798# endif
1799# endif
1800#endif
1801};
1802
1803
1804DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
1805 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
1806{
1807 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
1808
1809 pReNative->aHstRegs[idxReg].enmWhat = enmWhat;
1810 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
1811 pReNative->aHstRegs[idxReg].idxVar = idxVar;
1812 return (uint8_t)idxReg;
1813}
1814
1815
1816/**
1817 * Locate a register, possibly freeing one up.
1818 *
1819 * This ASSUMES the caller has done the minimal/optimal allocation checks and
1820 * failed.
1821 */
1822static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fAllowVolatile) RT_NOEXCEPT
1823{
1824 uint32_t fRegMask = fAllowVolatile
1825 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
1826 : IEMNATIVE_HST_GREG_MASK & ~(IEMNATIVE_REG_FIXED_MASK | IEMNATIVE_CALL_VOLATILE_GREG_MASK);
1827
1828 /*
1829 * Try a freed register that's shadowing a guest register
1830 */
1831 uint32_t fRegs = ~pReNative->bmHstRegs & fRegMask;
1832 if (fRegs)
1833 {
1834 /** @todo pick better here: */
1835 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
1836
1837 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
1838 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
1839 == pReNative->aHstRegs[idxReg].fGstRegShadows);
1840 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
1841
1842 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
1843 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
1844 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
1845 return idxReg;
1846 }
1847
1848 /*
1849 * Try free up a variable that's in a register.
1850 *
1851 * We do two rounds here, first evacuating variables we don't need to be
1852 * saved on the stack, then in the second round move things to the stack.
1853 */
1854 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
1855 {
1856 uint32_t fVars = pReNative->bmVars;
1857 while (fVars)
1858 {
1859 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
1860 uint8_t const idxReg = pReNative->aVars[idxVar].idxReg;
1861 if ( idxReg < RT_ELEMENTS(pReNative->aHstRegs)
1862 && (RT_BIT_32(idxReg) & fRegMask)
1863 && ( iLoop == 0
1864 ? pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack
1865 : pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
1866 {
1867 Assert(pReNative->bmHstRegs & RT_BIT_32(idxReg));
1868 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxReg].fGstRegShadows)
1869 == pReNative->aHstRegs[idxReg].fGstRegShadows);
1870 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
1871 == RT_BOOL(pReNative->aHstRegs[idxReg].fGstRegShadows));
1872
1873 if (pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
1874 {
1875 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT8_MAX);
1876 uint32_t off = *poff;
1877 *poff = off = iemNativeEmitStoreGprByBp(pReNative, off,
1878 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t)
1879 - IEMNATIVE_FP_OFF_STACK_VARS,
1880 idxReg);
1881 AssertReturn(off != UINT32_MAX, UINT8_MAX);
1882 }
1883
1884 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
1885 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
1886 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
1887 pReNative->bmHstRegs &= ~RT_BIT_32(idxReg);
1888 return idxReg;
1889 }
1890 fVars &= ~RT_BIT_32(idxVar);
1891 }
1892 }
1893
1894 AssertFailedReturn(UINT8_MAX);
1895}
1896
1897
1898/**
1899 * Moves a variable to a different register or spills it onto the stack.
1900 *
1901 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
1902 * kinds can easily be recreated if needed later.
1903 *
1904 * @returns The new code buffer position, UINT32_MAX on failure.
1905 * @param pReNative The native recompile state.
1906 * @param off The current code buffer position.
1907 * @param idxVar The variable index.
1908 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
1909 * call-volatile registers.
1910 */
1911static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
1912 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
1913{
1914 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
1915 Assert(pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
1916
1917 uint8_t const idxRegOld = pReNative->aVars[idxVar].idxReg;
1918 Assert(idxRegOld < RT_ELEMENTS(pReNative->aHstRegs));
1919 Assert(pReNative->bmHstRegs & RT_BIT_32(idxRegOld));
1920 Assert(pReNative->aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
1921 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegOld].fGstRegShadows)
1922 == pReNative->aHstRegs[idxRegOld].fGstRegShadows);
1923 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
1924 == RT_BOOL(pReNative->aHstRegs[idxRegOld].fGstRegShadows));
1925
1926
1927 /** @todo Add statistics on this.*/
1928 /** @todo Implement basic variable liveness analysis (python) so variables
1929 * can be freed immediately once no longer used. This has the potential to
1930 * be trashing registers and stack for dead variables. */
1931
1932 /*
1933 * First try move it to a different register, as that's cheaper.
1934 */
1935 fForbiddenRegs |= RT_BIT_32(idxRegOld);
1936 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
1937 uint32_t fRegs = ~pReNative->bmHstRegs & ~fForbiddenRegs;
1938 if (fRegs)
1939 {
1940 /* Avoid using shadow registers, if possible. */
1941 if (fRegs & ~pReNative->bmHstRegsWithGstShadow)
1942 fRegs &= ~pReNative->bmHstRegsWithGstShadow;
1943 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
1944
1945 uint64_t fGstRegShadows = pReNative->aHstRegs[idxRegOld].fGstRegShadows;
1946 pReNative->aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
1947 pReNative->aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
1948 pReNative->aHstRegs[idxRegNew].idxVar = idxVar;
1949 if (fGstRegShadows)
1950 {
1951 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
1952 while (fGstRegShadows)
1953 {
1954 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows);
1955 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
1956
1957 Assert(pReNative->aidxGstRegShadows[idxGstReg] == idxRegOld);
1958 pReNative->aidxGstRegShadows[idxGstReg] = idxRegNew;
1959 }
1960 }
1961
1962 pReNative->aVars[idxVar].idxReg = (uint8_t)idxRegNew;
1963 pReNative->bmHstRegs |= RT_BIT_32(idxRegNew);
1964 }
1965 /*
1966 * Otherwise we must spill the register onto the stack.
1967 */
1968 else
1969 {
1970 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT32_MAX);
1971 off = iemNativeEmitStoreGprByBp(pReNative, off,
1972 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t) - IEMNATIVE_FP_OFF_STACK_VARS,
1973 idxRegOld);
1974 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1975
1976 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
1977 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxRegOld].fGstRegShadows;
1978 }
1979
1980 pReNative->bmHstRegs &= ~RT_BIT_32(idxRegOld);
1981 pReNative->aHstRegs[idxRegOld].fGstRegShadows = 0;
1982 return off;
1983}
1984
1985
1986/**
1987 * Allocates a temporary host general purpose register.
1988 *
1989 * This may emit code to save register content onto the stack in order to free
1990 * up a register.
1991 *
1992 * @returns The host register number, UINT8_MAX on failure.
1993 * @param pReNative The native recompile state.
1994 * @param poff Pointer to the variable with the code buffer position.
1995 * This will be update if we need to move a variable from
1996 * register to stack in order to satisfy the request.
1997 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
1998 * registers (@c true, default) or the other way around
1999 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2000 */
2001DECLHIDDEN(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2002 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2003{
2004 /*
2005 * Try find a completely unused register, preferably a call-volatile one.
2006 */
2007 uint8_t idxReg;
2008 uint32_t fRegs = ~pReNative->bmHstRegs
2009 & ~pReNative->bmHstRegsWithGstShadow
2010 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2011 if (fRegs)
2012 {
2013 if (fPreferVolatile)
2014 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2015 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2016 else
2017 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2018 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2019 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2020 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2021 }
2022 else
2023 {
2024 idxReg = iemNativeRegAllocFindFree(pReNative, poff, true /*fAllowVolatile*/);
2025 AssertReturn(idxReg != UINT8_MAX, UINT8_MAX);
2026 }
2027 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2028}
2029
2030
2031/**
2032 * Info about shadowed guest register values.
2033 * @see IEMNATIVEGSTREG
2034 */
2035static struct
2036{
2037 /** Offset in VMCPU. */
2038 uint32_t off;
2039 /** The field size. */
2040 uint8_t cb;
2041 /** Name (for logging). */
2042 const char *pszName;
2043} const g_aGstShadowInfo[] =
2044{
2045#define CPUMCTX_OFF_AND_SIZE(a_Reg) RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2046 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2047 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2048 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2049 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2050 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2051 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2052 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2053 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2054 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2055 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2056 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2057 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2058 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2059 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2060 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2061 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2062 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2063 /* [kIemNativeGstReg_Rflags] = */ { CPUMCTX_OFF_AND_SIZE(rflags), "rflags", },
2064 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2065 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2066 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2067 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2068 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2069 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2070 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2071 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2072 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2073 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2074 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2075 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2076 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2077 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2078 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2079 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2080 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2081 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2082 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2083 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2084 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2085 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2086 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2087 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2088#undef CPUMCTX_OFF_AND_SIZE
2089};
2090AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2091
2092
2093/** Host CPU general purpose register names. */
2094const char * const g_apszIemNativeHstRegNames[] =
2095{
2096#ifdef RT_ARCH_AMD64
2097 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2098#elif RT_ARCH_ARM64
2099 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2100 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2101#else
2102# error "port me"
2103#endif
2104};
2105
2106/**
2107 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
2108 * extending to 64-bit width.
2109 *
2110 * @returns New code buffer offset on success, UINT32_MAX on failure.
2111 * @param pReNative .
2112 * @param off The current code buffer position.
2113 * @param idxHstReg The host register to load the guest register value into.
2114 * @param enmGstReg The guest register to load.
2115 *
2116 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
2117 * that is something the caller needs to do if applicable.
2118 */
2119DECLHIDDEN(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2120 uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
2121{
2122 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
2123 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2124
2125 switch (g_aGstShadowInfo[enmGstReg].cb)
2126 {
2127 case sizeof(uint64_t):
2128 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2129 case sizeof(uint32_t):
2130 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2131 case sizeof(uint16_t):
2132 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2133#if 0 /* not present in the table. */
2134 case sizeof(uint8_t):
2135 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2136#endif
2137 default:
2138 AssertFailedReturn(UINT32_MAX);
2139 }
2140}
2141
2142
2143#ifdef VBOX_STRICT
2144/**
2145 * Emitting code that checks that the content of register @a idxReg is the same
2146 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
2147 * instruction if that's not the case.
2148 *
2149 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
2150 * Trashes EFLAGS on AMD64.
2151 */
2152static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2153 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
2154{
2155# ifdef RT_ARCH_AMD64
2156 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2157 AssertReturn(pbCodeBuf, UINT32_MAX);
2158
2159 /* cmp reg, [mem] */
2160 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
2161 {
2162 if (idxReg >= 8)
2163 pbCodeBuf[off++] = X86_OP_REX_R;
2164 pbCodeBuf[off++] = 0x38;
2165 }
2166 else
2167 {
2168 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
2169 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
2170 else
2171 {
2172 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
2173 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2174 else
2175 AssertReturn(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t), UINT32_MAX);
2176 if (idxReg >= 8)
2177 pbCodeBuf[off++] = X86_OP_REX_R;
2178 }
2179 pbCodeBuf[off++] = 0x39;
2180 }
2181 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
2182
2183 /* je/jz +1 */
2184 pbCodeBuf[off++] = 0x74;
2185 pbCodeBuf[off++] = 0x01;
2186
2187 /* int3 */
2188 pbCodeBuf[off++] = 0xcc;
2189
2190 /* For values smaller than the register size, we must check that the rest
2191 of the register is all zeros. */
2192 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
2193 {
2194 /* test reg64, imm32 */
2195 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2196 pbCodeBuf[off++] = 0xf7;
2197 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2198 pbCodeBuf[off++] = 0;
2199 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
2200 pbCodeBuf[off++] = 0xff;
2201 pbCodeBuf[off++] = 0xff;
2202
2203 /* je/jz +1 */
2204 pbCodeBuf[off++] = 0x74;
2205 pbCodeBuf[off++] = 0x01;
2206
2207 /* int3 */
2208 pbCodeBuf[off++] = 0xcc;
2209 }
2210 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
2211 {
2212 /* rol reg64, 32 */
2213 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2214 pbCodeBuf[off++] = 0xc1;
2215 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2216 pbCodeBuf[off++] = 32;
2217
2218 /* test reg32, ffffffffh */
2219 if (idxReg >= 8)
2220 pbCodeBuf[off++] = X86_OP_REX_B;
2221 pbCodeBuf[off++] = 0xf7;
2222 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2223 pbCodeBuf[off++] = 0xff;
2224 pbCodeBuf[off++] = 0xff;
2225 pbCodeBuf[off++] = 0xff;
2226 pbCodeBuf[off++] = 0xff;
2227
2228 /* je/jz +1 */
2229 pbCodeBuf[off++] = 0x74;
2230 pbCodeBuf[off++] = 0x01;
2231
2232 /* int3 */
2233 pbCodeBuf[off++] = 0xcc;
2234
2235 /* rol reg64, 32 */
2236 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2237 pbCodeBuf[off++] = 0xc1;
2238 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2239 pbCodeBuf[off++] = 32;
2240 }
2241
2242# elif defined(RT_ARCH_ARM64)
2243 /* mov TMP0, [gstreg] */
2244 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
2245
2246 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2247 AssertReturn(pu32CodeBuf, UINT32_MAX);
2248 /* sub tmp0, tmp0, idxReg */
2249 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
2250 /* cbz tmp0, +1 */
2251 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
2252 /* brk #0x1000+enmGstReg */
2253 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
2254
2255# else
2256# error "Port me!"
2257# endif
2258 return off;
2259}
2260#endif /* VBOX_STRICT */
2261
2262
2263/**
2264 * Marks host register @a idxHstReg as containing a shadow copy of guest
2265 * register @a enmGstReg.
2266 *
2267 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2268 * host register before calling.
2269 */
2270DECL_FORCE_INLINE(void)
2271iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
2272{
2273 Assert(!(pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg)));
2274
2275 pReNative->aidxGstRegShadows[enmGstReg] = idxHstReg;
2276 pReNative->aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2277 pReNative->bmGstRegShadows |= RT_BIT_64(enmGstReg);
2278 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2279}
2280
2281
2282/**
2283 * Clear any guest register shadow claims from @a idxHstReg.
2284 *
2285 * The register does not need to be shadowing any guest registers.
2286 */
2287DECL_FORCE_INLINE(void)
2288iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg)
2289{
2290 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2291 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2292 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2293 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2294
2295 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2296 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2297 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2298}
2299
2300
2301/**
2302 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2303 * to @a idxRegTo.
2304 */
2305DECL_FORCE_INLINE(void)
2306iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo, IEMNATIVEGSTREG enmGstReg)
2307{
2308 Assert(pReNative->aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2309 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegFrom].fGstRegShadows)
2310 == pReNative->aHstRegs[idxRegFrom].fGstRegShadows);
2311 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2312 == RT_BOOL(pReNative->aHstRegs[idxRegFrom].fGstRegShadows));
2313
2314 pReNative->aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2315 pReNative->aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2316 pReNative->aidxGstRegShadows[enmGstReg] = idxRegTo;
2317}
2318
2319
2320
2321/**
2322 * Intended use statement for iemNativeRegAllocTmpForGuestReg().
2323 */
2324typedef enum IEMNATIVEGSTREGUSE
2325{
2326 /** The usage is read-only, the register holding the guest register
2327 * shadow copy will not be modified by the caller. */
2328 kIemNativeGstRegUse_ReadOnly = 0,
2329 /** The caller will update the guest register (think: PC += cbInstr).
2330 * The guest shadow copy will follow the returned register. */
2331 kIemNativeGstRegUse_ForUpdate,
2332 /** The caller will use the guest register value as input in a calculation
2333 * and the host register will be modified.
2334 * This means that the returned host register will not be marked as a shadow
2335 * copy of the guest register. */
2336 kIemNativeGstRegUse_Calculation
2337} IEMNATIVEGSTREGUSE;
2338
2339/**
2340 * Allocates a temporary host general purpose register for updating a guest
2341 * register value.
2342 *
2343 * Since we may already have a register holding the guest register value,
2344 * code will be emitted to do the loading if that's not the case. Code may also
2345 * be emitted if we have to free up a register to satify the request.
2346 *
2347 * @returns The host register number, UINT8_MAX on failure.
2348 * @param pReNative The native recompile state.
2349 * @param poff Pointer to the variable with the code buffer
2350 * position. This will be update if we need to move a
2351 * variable from register to stack in order to satisfy
2352 * the request.
2353 * @param enmGstReg The guest register that will is to be updated.
2354 * @param enmIntendedUse How the caller will be using the host register.
2355 */
2356DECLHIDDEN(uint8_t) iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2357 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse) RT_NOEXCEPT
2358{
2359 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2360#ifdef LOG_ENABLED
2361 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2362#endif
2363
2364 /*
2365 * First check if the guest register value is already in a host register.
2366 */
2367 if (pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg))
2368 {
2369 uint8_t idxReg = pReNative->aidxGstRegShadows[enmGstReg];
2370 Assert(idxReg < RT_ELEMENTS(pReNative->aHstRegs));
2371 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2372 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2373
2374 if (!(pReNative->bmHstRegs & RT_BIT_32(idxReg)))
2375 {
2376 /*
2377 * If the register will trash the guest shadow copy, try find a
2378 * completely unused register we can use instead. If that fails,
2379 * we need to disassociate the host reg from the guest reg.
2380 */
2381 /** @todo would be nice to know if preserving the register is in any way helpful. */
2382 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2383 && ( ~pReNative->bmHstRegs
2384 & ~pReNative->bmHstRegsWithGstShadow
2385 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2386 {
2387 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2388 Assert(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs));
2389
2390 uint32_t off = *poff;
2391 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2392 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2393
2394 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2395 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2396 g_apszIemNativeHstRegNames[idxRegNew]));
2397 idxReg = idxRegNew;
2398 }
2399 else
2400 {
2401 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
2402 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2403 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2404 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2405 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2406 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2407 else
2408 {
2409 iemNativeRegClearGstRegShadowing(pReNative, idxReg);
2410 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2411 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2412 }
2413 }
2414 }
2415 else
2416 {
2417 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2418 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2419
2420 /*
2421 * Allocate a new register, copy the value and, if updating, the
2422 * guest shadow copy assignment to the new register.
2423 */
2424 /** @todo share register for readonly access. */
2425 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2426 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2427
2428 uint32_t off = *poff;
2429 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2430 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2431
2432 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2433 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2434 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2435 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2436 else
2437 {
2438 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg);
2439 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2440 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2441 g_apszIemNativeHstRegNames[idxRegNew]));
2442 }
2443 idxReg = idxRegNew;
2444 }
2445
2446#ifdef VBOX_STRICT
2447 /* Strict builds: Check that the value is correct. */
2448 uint32_t off = *poff;
2449 *poff = off = iemNativeEmitGuestRegValueCheck(pReNative, off, idxReg, enmGstReg);
2450 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2451#endif
2452
2453 return idxReg;
2454 }
2455
2456 /*
2457 * Allocate a new register, load it with the guest value and designate it as a copy of the
2458 */
2459 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2460 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2461
2462 uint32_t off = *poff;
2463 *poff = off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxRegNew, enmGstReg);
2464 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2465
2466 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2467 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg);
2468 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2469 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2470
2471 return idxRegNew;
2472}
2473
2474
2475DECLHIDDEN(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar) RT_NOEXCEPT;
2476
2477
2478/**
2479 * Allocates argument registers for a function call.
2480 *
2481 * @returns New code buffer offset on success, UINT32_MAX on failure.
2482 * @param pReNative The native recompile state.
2483 * @param off The current code buffer offset.
2484 * @param cArgs The number of arguments the function call takes.
2485 */
2486DECLHIDDEN(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs) RT_NOEXCEPT
2487{
2488 AssertReturn(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT, false);
2489 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2490 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2491
2492 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2493 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2494 else if (cArgs == 0)
2495 return true;
2496
2497 /*
2498 * Do we get luck and all register are free and not shadowing anything?
2499 */
2500 if (((pReNative->bmHstRegs | pReNative->bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2501 for (uint32_t i = 0; i < cArgs; i++)
2502 {
2503 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2504 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2505 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2506 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2507 }
2508 /*
2509 * Okay, not lucky so we have to free up the registers.
2510 */
2511 else
2512 for (uint32_t i = 0; i < cArgs; i++)
2513 {
2514 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2515 if (pReNative->bmHstRegs & RT_BIT_32(idxReg))
2516 {
2517 switch (pReNative->aHstRegs[idxReg].enmWhat)
2518 {
2519 case kIemNativeWhat_Var:
2520 {
2521 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2522 AssertReturn(idxVar < RT_ELEMENTS(pReNative->aVars), false);
2523 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2524 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2525
2526 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2527 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2528 else
2529 {
2530 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2531 AssertReturn(off != UINT32_MAX, false);
2532 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2533 }
2534 break;
2535 }
2536
2537 case kIemNativeWhat_Tmp:
2538 case kIemNativeWhat_Arg:
2539 case kIemNativeWhat_rc:
2540 AssertFailedReturn(false);
2541 default:
2542 AssertFailedReturn(false);
2543 }
2544
2545 }
2546 if (pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2547 {
2548 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2549 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
2550 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2551 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2552 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2553 }
2554 else
2555 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2556 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2557 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2558 }
2559 pReNative->bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2560 return true;
2561}
2562
2563
2564DECLHIDDEN(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
2565
2566
2567#if 0
2568/**
2569 * Frees a register assignment of any type.
2570 *
2571 * @param pReNative The native recompile state.
2572 * @param idxHstReg The register to free.
2573 *
2574 * @note Does not update variables.
2575 */
2576DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2577{
2578 Assert(idxHstReg < RT_ELEMENTS(pReNative->aHstRegs));
2579 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2580 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2581 Assert( pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2582 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2583 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2584 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2585 Assert( pReNative->aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2586 || pReNative->aVars[pReNative->aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2587 || (pReNative->bmVars & RT_BIT_32(pReNative->aHstRegs[idxHstReg].idxVar)));
2588 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2589 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2590 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2591 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2592
2593 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2594 /* no flushing, right:
2595 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2596 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2597 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2598 */
2599}
2600#endif
2601
2602
2603/**
2604 * Frees a temporary register.
2605 *
2606 * Any shadow copies of guest registers assigned to the host register will not
2607 * be flushed by this operation.
2608 */
2609DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2610{
2611 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2612 Assert(pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2613 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2614 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2615 g_apszIemNativeHstRegNames[idxHstReg], pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2616}
2617
2618
2619/**
2620 * Called right before emitting a call instruction to move anything important
2621 * out of call-volatile registers, free and flush the call-volatile registers,
2622 * optionally freeing argument variables.
2623 *
2624 * @returns New code buffer offset, UINT32_MAX on failure.
2625 * @param pReNative The native recompile state.
2626 * @param off The code buffer offset.
2627 * @param cArgs The number of arguments the function call takes.
2628 * It is presumed that the host register part of these have
2629 * been allocated as such already and won't need moving,
2630 * just freeing.
2631 * @param fFreeArgVars Whether to free argument variables for the call.
2632 */
2633DECLHIDDEN(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2634 uint8_t cArgs, bool fFreeArgVars) RT_NOEXCEPT
2635{
2636 /*
2637 * Free argument variables first (simplified).
2638 */
2639 AssertReturn(cArgs <= RT_ELEMENTS(pReNative->aidxArgVars), UINT32_MAX);
2640 if (fFreeArgVars && cArgs > 0)
2641 {
2642 for (uint32_t i = 0; i < cArgs; i++)
2643 {
2644 uint8_t idxVar = pReNative->aidxArgVars[i];
2645 if (idxVar < RT_ELEMENTS(pReNative->aVars))
2646 {
2647 pReNative->aidxArgVars[i] = UINT8_MAX;
2648 pReNative->bmVars &= ~RT_BIT_32(idxVar);
2649 Assert( pReNative->aVars[idxVar].idxReg
2650 == (i < RT_ELEMENTS(g_aidxIemNativeCallRegs) ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
2651 }
2652 }
2653 Assert(pReNative->u64ArgVars == UINT64_MAX);
2654 }
2655
2656 /*
2657 * Move anything important out of volatile registers.
2658 */
2659 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2660 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2661 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2662#ifdef IEMNATIVE_REG_FIXED_TMP0
2663 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2664#endif
2665 & ~g_afIemNativeCallRegs[cArgs];
2666
2667 fRegsToMove &= pReNative->bmHstRegs;
2668 if (!fRegsToMove)
2669 { /* likely */ }
2670 else
2671 while (fRegsToMove != 0)
2672 {
2673 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2674 fRegsToMove &= ~RT_BIT_32(idxReg);
2675
2676 switch (pReNative->aHstRegs[idxReg].enmWhat)
2677 {
2678 case kIemNativeWhat_Var:
2679 {
2680 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2681 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
2682 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2683 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2684 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2685 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2686 else
2687 {
2688 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2689 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2690 }
2691 continue;
2692 }
2693
2694 case kIemNativeWhat_Arg:
2695 AssertMsgFailed(("What?!?: %u\n", idxReg));
2696 continue;
2697
2698 case kIemNativeWhat_rc:
2699 case kIemNativeWhat_Tmp:
2700 AssertMsgFailed(("Missing free: %u\n", idxReg));
2701 continue;
2702
2703 case kIemNativeWhat_FixedTmp:
2704 case kIemNativeWhat_pVCpuFixed:
2705 case kIemNativeWhat_pCtxFixed:
2706 case kIemNativeWhat_FixedReserved:
2707 case kIemNativeWhat_Invalid:
2708 case kIemNativeWhat_End:
2709 AssertFailedReturn(UINT32_MAX);
2710 }
2711 AssertFailedReturn(UINT32_MAX);
2712 }
2713
2714 /*
2715 * Do the actual freeing.
2716 */
2717 pReNative->bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2718
2719 /* If there are guest register shadows in any call-volatile register, we
2720 have to clear the corrsponding guest register masks for each register. */
2721 uint32_t fHstRegsWithGstShadow = pReNative->bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2722 if (fHstRegsWithGstShadow)
2723 {
2724 pReNative->bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
2725 do
2726 {
2727 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
2728 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
2729
2730 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2731 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2732 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2733 } while (fHstRegsWithGstShadow != 0);
2734 }
2735
2736 return off;
2737}
2738
2739
2740/**
2741 * Flushes a set of guest register shadow copies.
2742 *
2743 * This is usually done after calling a threaded function or a C-implementation
2744 * of an instruction.
2745 *
2746 * @param pReNative The native recompile state.
2747 * @param fGstRegs Set of guest registers to flush.
2748 */
2749DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
2750{
2751 /*
2752 * Reduce the mask by what's currently shadowed
2753 */
2754 fGstRegs &= pReNative->bmGstRegShadows;
2755 if (fGstRegs)
2756 {
2757 pReNative->bmGstRegShadows &= ~fGstRegs;
2758 if (pReNative->bmGstRegShadows)
2759 {
2760 /*
2761 * Partial.
2762 */
2763 do
2764 {
2765 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2766 uint8_t const idxHstReg = pReNative->aidxGstRegShadows[idxGstReg];
2767 Assert(idxHstReg < RT_ELEMENTS(pReNative->aidxGstRegShadows));
2768 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2769 Assert(pReNative->aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
2770
2771 uint64_t const fInThisHstReg = (pReNative->aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
2772 fGstRegs &= ~fInThisHstReg;
2773 pReNative->aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
2774 if (!pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2775 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2776 } while (fGstRegs != 0);
2777 }
2778 else
2779 {
2780 /*
2781 * Clear all.
2782 */
2783 do
2784 {
2785 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2786 uint8_t const idxHstReg = pReNative->aidxGstRegShadows[idxGstReg];
2787 Assert(idxHstReg < RT_ELEMENTS(pReNative->aidxGstRegShadows));
2788 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2789 Assert(pReNative->aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
2790
2791 fGstRegs &= ~(pReNative->aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
2792 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2793 } while (fGstRegs != 0);
2794 pReNative->bmHstRegsWithGstShadow = 0;
2795 }
2796 }
2797}
2798
2799
2800/**
2801 * Emits a code for checking the return code of a call and rcPassUp, returning
2802 * from the code if either are non-zero.
2803 */
2804DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2805 uint8_t idxInstr) RT_NOEXCEPT
2806{
2807#ifdef RT_ARCH_AMD64
2808 /*
2809 * AMD64: eax = call status code.
2810 */
2811
2812 /* edx = rcPassUp */
2813 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
2814 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2815
2816 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
2817 AssertReturn(pbCodeBuf, UINT32_MAX);
2818
2819 /* edx = eax | rcPassUp */
2820 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
2821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
2822
2823 /* Jump to non-zero status return path, loading cl with the instruction number. */
2824 pbCodeBuf[off++] = 0xb0 + X86_GREG_xCX; /* mov cl, imm8 (pCallEntry->idxInstr) */
2825 pbCodeBuf[off++] = idxInstr;
2826
2827 pbCodeBuf[off++] = 0x0f; /* jnz rel32 */
2828 pbCodeBuf[off++] = 0x85;
2829 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
2830 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
2831 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4), UINT32_MAX);
2832 pbCodeBuf[off++] = 0x00;
2833 pbCodeBuf[off++] = 0x00;
2834 pbCodeBuf[off++] = 0x00;
2835 pbCodeBuf[off++] = 0x00;
2836
2837 /* done. */
2838
2839#elif RT_ARCH_ARM64
2840 /*
2841 * ARM64: w0 = call status code.
2842 */
2843 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
2844 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
2845
2846 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2847 AssertReturn(pu32CodeBuf, UINT32_MAX);
2848
2849 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
2850
2851 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
2852 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
2853 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5), UINT32_MAX);
2854 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
2855
2856#else
2857# error "port me"
2858#endif
2859 return off;
2860}
2861
2862
2863/**
2864 * Emits a call to a CImpl function or something similar.
2865 */
2866static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2867 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
2868 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
2869{
2870#ifdef VBOX_STRICT
2871 off = iemNativeEmitMarker(pReNative, off);
2872 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2873#endif
2874
2875 /*
2876 * Load the parameters.
2877 */
2878#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
2879 /* Special code the hidden VBOXSTRICTRC pointer. */
2880 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2881 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
2882 if (cAddParams > 0)
2883 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
2884 if (cAddParams > 1)
2885 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
2886 if (cAddParams > 2)
2887 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
2888 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2889
2890#else
2891 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
2892 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2893 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
2894 if (cAddParams > 0)
2895 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
2896 if (cAddParams > 1)
2897 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
2898 if (cAddParams > 2)
2899# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
2900 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
2901# else
2902 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
2903# endif
2904#endif
2905 AssertReturn(off != UINT32_MAX, off);
2906
2907 /*
2908 * Make the call.
2909 */
2910#ifdef RT_ARCH_AMD64
2911 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, pfnCImpl);
2912
2913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2914 AssertReturn(pbCodeBuf, UINT32_MAX);
2915 pbCodeBuf[off++] = 0xff; /* call rax */
2916 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
2917
2918# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
2919 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2920# endif
2921
2922#elif defined(RT_ARCH_ARM64)
2923 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, pfnCImpl);
2924
2925 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2926 AssertReturn(pu32CodeBuf, UINT32_MAX);
2927 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
2928
2929#else
2930# error "Port me!"
2931#endif
2932
2933 /*
2934 * Check the status code.
2935 */
2936 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2937}
2938
2939
2940/**
2941 * Emits a call to a threaded worker function.
2942 */
2943static int32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
2944{
2945#ifdef VBOX_STRICT
2946 off = iemNativeEmitMarker(pReNative, off);
2947 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2948#endif
2949 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
2950 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
2951 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
2952
2953#ifdef RT_ARCH_AMD64
2954 /* Load the parameters and emit the call. */
2955# ifdef RT_OS_WINDOWS
2956# ifndef VBOXSTRICTRC_STRICT_ENABLED
2957 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
2958 if (cParams > 0)
2959 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
2960 if (cParams > 1)
2961 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
2962 if (cParams > 2)
2963 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
2964# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
2965 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
2966 if (cParams > 0)
2967 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
2968 if (cParams > 1)
2969 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
2970 if (cParams > 2)
2971 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
2972 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
2973 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2974# endif /* VBOXSTRICTRC_STRICT_ENABLED */
2975# else
2976 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
2977 if (cParams > 0)
2978 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
2979 if (cParams > 1)
2980 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
2981 if (cParams > 2)
2982 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
2983# endif
2984 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
2985
2986 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2987 AssertReturn(pbCodeBuf, UINT32_MAX);
2988 pbCodeBuf[off++] = 0xff; /* call rax */
2989 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
2990
2991# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
2992 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2993# endif
2994
2995#elif RT_ARCH_ARM64
2996 /*
2997 * ARM64:
2998 */
2999 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3000 if (cParams > 0)
3001 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3002 if (cParams > 1)
3003 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3004 if (cParams > 2)
3005 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3006 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0,
3007 (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3008
3009 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3010 AssertReturn(pu32CodeBuf, UINT32_MAX);
3011
3012 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
3013
3014#else
3015# error "port me"
3016#endif
3017
3018 /*
3019 * Check the status code.
3020 */
3021 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3022 AssertReturn(off != UINT32_MAX, off);
3023
3024 return off;
3025}
3026
3027
3028/**
3029 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3030 */
3031static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3032{
3033 /*
3034 * Generate the rc + rcPassUp fiddling code if needed.
3035 */
3036 uint32_t idxLabel = iemNativeFindLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3037 if (idxLabel != UINT32_MAX)
3038 {
3039 Assert(pReNative->paLabels[idxLabel].off == UINT32_MAX);
3040 pReNative->paLabels[idxLabel].off = off;
3041
3042 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3043#ifdef RT_ARCH_AMD64
3044 /*
3045 * AMD64:
3046 */
3047 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3048 AssertReturn(pbCodeBuf, UINT32_MAX);
3049
3050 /* Call helper and jump to return point. */
3051# ifdef RT_OS_WINDOWS
3052 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3053 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3054 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3055 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3056 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3057 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3058# else
3059 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3060 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3061 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3062 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3063 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3064 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3065# endif
3066 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3067 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3068
3069 pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3070 AssertReturn(pbCodeBuf, UINT32_MAX);
3071 pbCodeBuf[off++] = 0xff; /* call rax */
3072 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
3073
3074 /* Jump to common return point. */
3075 uint32_t offRel = pReNative->paLabels[idxReturnLabel].off - (off + 2);
3076 if (-(int32_t)offRel <= 127)
3077 {
3078 pbCodeBuf[off++] = 0xeb; /* jmp rel8 */
3079 pbCodeBuf[off++] = (uint8_t)offRel;
3080 off++;
3081 }
3082 else
3083 {
3084 offRel -= 3;
3085 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */
3086 pbCodeBuf[off++] = RT_BYTE1(offRel);
3087 pbCodeBuf[off++] = RT_BYTE2(offRel);
3088 pbCodeBuf[off++] = RT_BYTE3(offRel);
3089 pbCodeBuf[off++] = RT_BYTE4(offRel);
3090 }
3091 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3092
3093#elif RT_ARCH_ARM64
3094 /*
3095 * ARM64:
3096 */
3097 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3098 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3099 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3100 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3101 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3102 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3103 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3104
3105 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3106 AssertReturn(pu32CodeBuf, UINT32_MAX);
3107 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
3108
3109 /* Jump back to the common return point. */
3110 int32_t const offRel = pReNative->paLabels[idxReturnLabel].off - off;
3111 pu32CodeBuf[off++] = Armv8A64MkInstrB(offRel);
3112#else
3113# error "port me"
3114#endif
3115 }
3116 return off;
3117}
3118
3119
3120/**
3121 * Emits a standard epilog.
3122 */
3123static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3124{
3125 /*
3126 * Successful return, so clear the return register (eax, w0).
3127 */
3128 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3129 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3130
3131 /*
3132 * Define label for common return point.
3133 */
3134 uint32_t const idxReturn = iemNativeMakeLabel(pReNative, kIemNativeLabelType_Return, off);
3135 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
3136
3137 /*
3138 * Restore registers and return.
3139 */
3140#ifdef RT_ARCH_AMD64
3141 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3142 AssertReturn(pbCodeBuf, UINT32_MAX);
3143
3144 /* Reposition esp at the r15 restore point. */
3145 pbCodeBuf[off++] = X86_OP_REX_W;
3146 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3147 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3148 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3149
3150 /* Pop non-volatile registers and return */
3151 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3152 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3153 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3154 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3155 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3156 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3157 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3158 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3159# ifdef RT_OS_WINDOWS
3160 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3161 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3162# endif
3163 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3164 pbCodeBuf[off++] = 0xc9; /* leave */
3165 pbCodeBuf[off++] = 0xc3; /* ret */
3166 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3167
3168#elif RT_ARCH_ARM64
3169 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3170 AssertReturn(pu32CodeBuf, UINT32_MAX);
3171
3172 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3173 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3174 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
3175 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3176 IEMNATIVE_FRAME_VAR_SIZE / 8);
3177 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3178 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3179 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3180 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3181 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3182 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3183 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3184 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3185 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3186 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3187 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3188 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3189
3190 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3191 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3192 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3193 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3194
3195 /* retab / ret */
3196# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3197 if (1)
3198 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3199 else
3200# endif
3201 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3202
3203#else
3204# error "port me"
3205#endif
3206
3207 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3208}
3209
3210
3211/**
3212 * Emits a standard prolog.
3213 */
3214static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3215{
3216#ifdef RT_ARCH_AMD64
3217 /*
3218 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3219 * reserving 64 bytes for stack variables plus 4 non-register argument
3220 * slots. Fixed register assignment: xBX = pReNative;
3221 *
3222 * Since we always do the same register spilling, we can use the same
3223 * unwind description for all the code.
3224 */
3225 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3226 AssertReturn(pbCodeBuf, UINT32_MAX);
3227 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3228 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3229 pbCodeBuf[off++] = 0x8b;
3230 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3231 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3232 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3233# ifdef RT_OS_WINDOWS
3234 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3235 pbCodeBuf[off++] = 0x8b;
3236 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3237 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3238 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3239# else
3240 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3241 pbCodeBuf[off++] = 0x8b;
3242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3243# endif
3244 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3245 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3246 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3247 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3248 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3249 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3250 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3251 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3252
3253 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3254 X86_GREG_xSP,
3255 IEMNATIVE_FRAME_ALIGN_SIZE
3256 + IEMNATIVE_FRAME_VAR_SIZE
3257 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3258 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3259 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3260 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3261 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3262
3263#elif RT_ARCH_ARM64
3264 /*
3265 * We set up a stack frame exactly like on x86, only we have to push the
3266 * return address our selves here. We save all non-volatile registers.
3267 */
3268 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3269 AssertReturn(pu32CodeBuf, UINT32_MAX);
3270
3271# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3272 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3273 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3274 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3275 /* pacibsp */
3276 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3277# endif
3278
3279 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3280 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3281 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
3282 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3283 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3284 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3285 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3286 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3287 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3288 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3289 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3290 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3291 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3292 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3293 /* Save the BP and LR (ret address) registers at the top of the frame. */
3294 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3295 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3296 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3297 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3298 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3299 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3300
3301 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3302 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3303
3304 /* mov r28, r0 */
3305 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3306 /* mov r27, r1 */
3307 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3308
3309#else
3310# error "port me"
3311#endif
3312 return off;
3313}
3314
3315
3316DECLINLINE(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3317 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3318{
3319 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3320}
3321
3322
3323DECLINLINE(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3324 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3325{
3326 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3327}
3328
3329
3330DECLINLINE(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3331 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1, uint64_t uArg2)
3332{
3333 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3334}
3335
3336
3337#if 0
3338/** Same as iemRegFinishClearingRF. */
3339DECLINLINE(uint32_t) iemNativeEmitFinishClearingRF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3340{
3341 RT_NOREF(pReNative, off);
3342#if 0
3343 uint32_t const fFlags = pReNative->pTbOrg->fFlags;
3344 if (fFlags & IEMTB_F_INHIBIT_SHADOW)
3345 {
3346 }
3347 IEMTB_F_IEM_F_MASK
3348
3349 //
3350 if (RT_LIKELY(!( pVCpu->cpum.GstCtx.eflags.uBoth
3351 & (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)) ))
3352 return VINF_SUCCESS;
3353 return iemFinishInstructionWithFlagsSet(pVCpu);
3354#else
3355 return UINT32_MAX;
3356#endif
3357}
3358#endif
3359
3360
3361/** Same as iemRegAddToEip32AndFinishingNoFlags. */
3362DECLINLINE(uint32_t) iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3363{
3364 /* Allocate a temporary PC register. */
3365 /** @todo this is not strictly required on AMD64, we could emit alternative
3366 * code here if we don't get a tmp register... */
3367 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3368 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3369
3370 /* Perform the addition and store the result. */
3371 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3372 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3373
3374 /* Free but don't flush the PC register. */
3375 iemNativeRegFreeTmp(pReNative, idxPcReg);
3376
3377 return off;
3378}
3379
3380/*
3381 * MC definitions for the native recompiler.
3382 */
3383
3384#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3385 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3386
3387#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3388 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3389
3390#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3391 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3392
3393#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3394 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3395
3396
3397#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3398 {
3399
3400#define IEM_MC_END() \
3401 } AssertFailedReturn(UINT32_MAX /* shouldn't be reached! */)
3402
3403#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
3404 return iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, a_cbInstr)
3405
3406#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
3407 return iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, a_cbInstr)
3408
3409#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
3410 return iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, a_cbInstr)
3411
3412
3413/*
3414 * Builtin functions.
3415 */
3416
3417/**
3418 * Built-in function that calls a C-implemention function taking zero arguments.
3419 */
3420static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
3421{
3422 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
3423 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
3424 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
3425}
3426
3427
3428
3429/*
3430 * Include g_apfnIemNativeRecompileFunctions and associated functions.
3431 *
3432 * This should probably live in it's own file later, but lets see what the
3433 * compile times turn out to be first.
3434 */
3435#include "IEMNativeFunctions.cpp.h"
3436
3437
3438/**
3439 * Recompiles the given threaded TB into a native one.
3440 *
3441 * In case of failure the translation block will be returned as-is.
3442 *
3443 * @returns pTb.
3444 * @param pVCpu The cross context virtual CPU structure of the calling
3445 * thread.
3446 * @param pTb The threaded translation to recompile to native.
3447 */
3448PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
3449{
3450 /*
3451 * The first time thru, we allocate the recompiler state, the other times
3452 * we just need to reset it before using it again.
3453 */
3454 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
3455 if (RT_LIKELY(pReNative))
3456 iemNativeReInit(pReNative, pTb);
3457 else
3458 {
3459 pReNative = iemNativeInit(pVCpu, pTb);
3460 AssertReturn(pReNative, pTb);
3461 }
3462
3463 /*
3464 * Emit prolog code (fixed).
3465 */
3466 uint32_t off = iemNativeEmitProlog(pReNative, 0);
3467 AssertReturn(off != UINT32_MAX, pTb);
3468
3469 /*
3470 * Convert the calls to native code.
3471 */
3472 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3473 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3474 while (cCallsLeft-- > 0)
3475 {
3476 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
3477 if (pfnRecom) /** @todo stats on this. */
3478 {
3479 //STAM_COUNTER_INC()
3480 off = pfnRecom(pReNative, off, pCallEntry);
3481 }
3482 else
3483 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
3484 AssertReturn(off != UINT32_MAX, pTb);
3485
3486 pCallEntry++;
3487 }
3488
3489 /*
3490 * Emit the epilog code.
3491 */
3492 off = iemNativeEmitEpilog(pReNative, off);
3493 AssertReturn(off != UINT32_MAX, pTb);
3494
3495 /*
3496 * Make sure all labels has been defined.
3497 */
3498 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
3499#ifdef VBOX_STRICT
3500 uint32_t const cLabels = pReNative->cLabels;
3501 for (uint32_t i = 0; i < cLabels; i++)
3502 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
3503#endif
3504
3505 /*
3506 * Allocate executable memory, copy over the code we've generated.
3507 */
3508 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3509 if (pTbAllocator->pDelayedFreeHead)
3510 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
3511
3512 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
3513 AssertReturn(paFinalInstrBuf, pTb);
3514 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
3515
3516 /*
3517 * Apply fixups.
3518 */
3519 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
3520 uint32_t const cFixups = pReNative->cFixups;
3521 for (uint32_t i = 0; i < cFixups; i++)
3522 {
3523 Assert(paFixups[i].off < off);
3524 Assert(paFixups[i].idxLabel < cLabels);
3525 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
3526 switch (paFixups[i].enmType)
3527 {
3528#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3529 case kIemNativeFixupType_Rel32:
3530 Assert(paFixups[i].off + 4 <= off);
3531 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
3532 continue;
3533
3534#elif defined(RT_ARCH_ARM64)
3535 case kIemNativeFixupType_RelImm19At5:
3536 {
3537 Assert(paFixups[i].off < off);
3538 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
3539 Assert(offDisp >= -262144 && offDisp < 262144);
3540 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
3541 continue;
3542 }
3543#endif
3544 case kIemNativeFixupType_Invalid:
3545 case kIemNativeFixupType_End:
3546 break;
3547 }
3548 AssertFailed();
3549 }
3550
3551 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
3552#ifdef LOG_ENABLED
3553 if (LogIs3Enabled())
3554 {
3555
3556 }
3557#endif
3558
3559 /*
3560 * Convert the translation block.
3561 */
3562 //RT_BREAKPOINT();
3563 RTMemFree(pTb->Thrd.paCalls);
3564 pTb->Native.paInstructions = paFinalInstrBuf;
3565 pTb->Native.cInstructions = off;
3566 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
3567
3568 Assert(pTbAllocator->cThreadedTbs > 0);
3569 pTbAllocator->cThreadedTbs -= 1;
3570 pTbAllocator->cNativeTbs += 1;
3571 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
3572
3573 return pTb;
3574}
3575
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette