VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 104939

Last change on this file since 104939 was 104876, checked in by vboxsync, 8 months ago

VMM/IEM: Optimize executable memory allocation on macOS by removing the need for calling RTMemProtect() to switch between RW and RX memory. [build fixes] ?bugref:10555

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 63.4 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 104876 2024-06-10 13:50:35Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# include <mach/mach.h>
75# include <mach/mach_vm.h>
76# define IEMNATIVE_USE_LIBUNWIND
77extern "C" void __register_frame(const void *pvFde);
78extern "C" void __deregister_frame(const void *pvFde);
79# else
80# ifdef DEBUG_bird /** @todo not thread safe yet */
81# define IEMNATIVE_USE_GDB_JIT
82# endif
83# ifdef IEMNATIVE_USE_GDB_JIT
84# include <iprt/critsect.h>
85# include <iprt/once.h>
86# include <iprt/formats/elf64.h>
87# endif
88extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
89extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
90# endif
91#endif
92
93#include "IEMN8veRecompiler.h"
94
95
96/*********************************************************************************************************************************
97* Executable Memory Allocator *
98*********************************************************************************************************************************/
99/** The chunk sub-allocation unit size in bytes. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
101/** The chunk sub-allocation unit size as a shift factor. */
102#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
103/** Enables adding a header to the sub-allocator allocations.
104 * This is useful for freeing up executable memory among other things. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
106/** Use alternative pruning. */
107#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
108
109
110#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
111# ifdef IEMNATIVE_USE_GDB_JIT
112# define IEMNATIVE_USE_GDB_JIT_ET_DYN
113
114/** GDB JIT: Code entry. */
115typedef struct GDBJITCODEENTRY
116{
117 struct GDBJITCODEENTRY *pNext;
118 struct GDBJITCODEENTRY *pPrev;
119 uint8_t *pbSymFile;
120 uint64_t cbSymFile;
121} GDBJITCODEENTRY;
122
123/** GDB JIT: Actions. */
124typedef enum GDBJITACTIONS : uint32_t
125{
126 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
127} GDBJITACTIONS;
128
129/** GDB JIT: Descriptor. */
130typedef struct GDBJITDESCRIPTOR
131{
132 uint32_t uVersion;
133 GDBJITACTIONS enmAction;
134 GDBJITCODEENTRY *pRelevant;
135 GDBJITCODEENTRY *pHead;
136 /** Our addition: */
137 GDBJITCODEENTRY *pTail;
138} GDBJITDESCRIPTOR;
139
140/** GDB JIT: Our simple symbol file data. */
141typedef struct GDBJITSYMFILE
142{
143 Elf64_Ehdr EHdr;
144# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
145 Elf64_Shdr aShdrs[5];
146# else
147 Elf64_Shdr aShdrs[7];
148 Elf64_Phdr aPhdrs[2];
149# endif
150 /** The dwarf ehframe data for the chunk. */
151 uint8_t abEhFrame[512];
152 char szzStrTab[128];
153 Elf64_Sym aSymbols[3];
154# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
155 Elf64_Sym aDynSyms[2];
156 Elf64_Dyn aDyn[6];
157# endif
158} GDBJITSYMFILE;
159
160extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
161extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
162
163/** Init once for g_IemNativeGdbJitLock. */
164static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
165/** Init once for the critical section. */
166static RTCRITSECT g_IemNativeGdbJitLock;
167
168/** GDB reads the info here. */
169GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
170
171/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
172DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
173{
174 ASMNopPause();
175}
176
177/** @callback_method_impl{FNRTONCE} */
178static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
179{
180 RT_NOREF(pvUser);
181 return RTCritSectInit(&g_IemNativeGdbJitLock);
182}
183
184
185# endif /* IEMNATIVE_USE_GDB_JIT */
186
187/**
188 * Per-chunk unwind info for non-windows hosts.
189 */
190typedef struct IEMEXECMEMCHUNKEHFRAME
191{
192# ifdef IEMNATIVE_USE_LIBUNWIND
193 /** The offset of the FDA into abEhFrame. */
194 uintptr_t offFda;
195# else
196 /** 'struct object' storage area. */
197 uint8_t abObject[1024];
198# endif
199# ifdef IEMNATIVE_USE_GDB_JIT
200# if 0
201 /** The GDB JIT 'symbol file' data. */
202 GDBJITSYMFILE GdbJitSymFile;
203# endif
204 /** The GDB JIT list entry. */
205 GDBJITCODEENTRY GdbJitEntry;
206# endif
207 /** The dwarf ehframe data for the chunk. */
208 uint8_t abEhFrame[512];
209} IEMEXECMEMCHUNKEHFRAME;
210/** Pointer to per-chunk info info for non-windows hosts. */
211typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
212#endif
213
214
215/**
216 * An chunk of executable memory.
217 */
218typedef struct IEMEXECMEMCHUNK
219{
220 /** Number of free items in this chunk. */
221 uint32_t cFreeUnits;
222 /** Hint were to start searching for free space in the allocation bitmap. */
223 uint32_t idxFreeHint;
224 /** Pointer to the readable/writeable view of the memory chunk. */
225 void *pvChunkRw;
226 /** Pointer to the readable/executable view of the memory chunk. */
227 void *pvChunkRx;
228#ifdef IN_RING3
229 /**
230 * Pointer to the unwind information.
231 *
232 * This is used during C++ throw and longjmp (windows and probably most other
233 * platforms). Some debuggers (windbg) makes use of it as well.
234 *
235 * Windows: This is allocated from hHeap on windows because (at least for
236 * AMD64) the UNWIND_INFO structure address in the
237 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
238 *
239 * Others: Allocated from the regular heap to avoid unnecessary executable data
240 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
241 void *pvUnwindInfo;
242#elif defined(IN_RING0)
243 /** Allocation handle. */
244 RTR0MEMOBJ hMemObj;
245#endif
246} IEMEXECMEMCHUNK;
247/** Pointer to a memory chunk. */
248typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
249
250
251/**
252 * Executable memory allocator for the native recompiler.
253 */
254typedef struct IEMEXECMEMALLOCATOR
255{
256 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
257 uint32_t uMagic;
258
259 /** The chunk size. */
260 uint32_t cbChunk;
261 /** The maximum number of chunks. */
262 uint32_t cMaxChunks;
263 /** The current number of chunks. */
264 uint32_t cChunks;
265 /** Hint where to start looking for available memory. */
266 uint32_t idxChunkHint;
267 /** Statistics: Current number of allocations. */
268 uint32_t cAllocations;
269
270 /** The total amount of memory available. */
271 uint64_t cbTotal;
272 /** Total amount of free memory. */
273 uint64_t cbFree;
274 /** Total amount of memory allocated. */
275 uint64_t cbAllocated;
276
277 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
278 *
279 * Since the chunk size is a power of two and the minimum chunk size is a lot
280 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
281 * require a whole number of uint64_t elements in the allocation bitmap. So,
282 * for sake of simplicity, they are allocated as one continous chunk for
283 * simplicity/laziness. */
284 uint64_t *pbmAlloc;
285 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
286 uint32_t cUnitsPerChunk;
287 /** Number of bitmap elements per chunk (for quickly locating the bitmap
288 * portion corresponding to an chunk). */
289 uint32_t cBitmapElementsPerChunk;
290
291#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
292 /** The next chunk to prune in. */
293 uint32_t idxChunkPrune;
294 /** Where in chunk offset to start pruning at. */
295 uint32_t offChunkPrune;
296 /** Profiling the pruning code. */
297 STAMPROFILE StatPruneProf;
298 /** Number of bytes recovered by the pruning. */
299 STAMPROFILE StatPruneRecovered;
300#endif
301
302#ifdef VBOX_WITH_STATISTICS
303 STAMPROFILE StatAlloc;
304#endif
305
306
307#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
308 /** Pointer to the array of unwind info running parallel to aChunks (same
309 * allocation as this structure, located after the bitmaps).
310 * (For Windows, the structures must reside in 32-bit RVA distance to the
311 * actual chunk, so they are allocated off the chunk.) */
312 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
313#endif
314
315 /** The allocation chunks. */
316 RT_FLEXIBLE_ARRAY_EXTENSION
317 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
318} IEMEXECMEMALLOCATOR;
319/** Pointer to an executable memory allocator. */
320typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
321
322/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
323#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
324
325
326#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
327/**
328 * Allocation header.
329 */
330typedef struct IEMEXECMEMALLOCHDR
331{
332 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
333 uint32_t uMagic;
334 /** The allocation chunk (for speeding up freeing). */
335 uint32_t idxChunk;
336 /** Pointer to the translation block the allocation belongs to.
337 * This is the whole point of the header. */
338 PIEMTB pTb;
339} IEMEXECMEMALLOCHDR;
340/** Pointer to an allocation header. */
341typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
342/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
343# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
344#endif
345
346
347static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
348
349#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
350/**
351 * Frees up executable memory when we're out space.
352 *
353 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
354 * space in a more linear fashion from the allocator's point of view. It may
355 * also defragment if implemented & enabled
356 */
357static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
358{
359# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
360# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
361# endif
362 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
363
364 /*
365 * Before we can start, we must process delayed frees.
366 */
367 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
368
369 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
370
371 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
372 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
373 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
374
375 uint32_t const cChunks = pExecMemAllocator->cChunks;
376 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
377 AssertReturnVoid(cChunks >= 1);
378
379 Assert(!pVCpu->iem.s.pCurTbR3);
380
381 /*
382 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
383 * scanning a multiple of two here as well.
384 */
385 uint32_t cbToPrune = cbChunk;
386
387 /* Never more than 25%. */
388 if (cChunks < 4)
389 cbToPrune /= cChunks == 1 ? 4 : 2;
390
391 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
392 if (cbToPrune > _4M)
393 cbToPrune = _4M;
394
395 /*
396 * Adjust the pruning chunk and offset accordingly.
397 */
398 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
399 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
400 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
401 if (offChunk >= cbChunk)
402 {
403 offChunk = 0;
404 idxChunk += 1;
405 }
406 if (idxChunk >= cChunks)
407 {
408 offChunk = 0;
409 idxChunk = 0;
410 }
411
412 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
413
414 /*
415 * Do the pruning. The current approach is the sever kind.
416 */
417 uint64_t cbPruned = 0;
418 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
419 while (offChunk < offPruneEnd)
420 {
421 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
422
423 /* Is this the start of an allocation block for TB? (We typically have
424 one allocation at the start of each chunk for the unwind info where
425 pTb is NULL.) */
426 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
427 && pHdr->pTb != NULL
428 && pHdr->idxChunk == idxChunk)
429 {
430 PIEMTB const pTb = pHdr->pTb;
431 AssertPtr(pTb);
432
433 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
434 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
435 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
436
437 iemTbAllocatorFree(pVCpu, pTb);
438
439 cbPruned += cbBlock;
440 offChunk += cbBlock;
441 }
442 else
443 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
444 }
445 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
446
447 /*
448 * Save the current pruning point.
449 */
450 pExecMemAllocator->offChunkPrune = offChunk;
451 pExecMemAllocator->idxChunkPrune = idxChunk;
452
453 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
454}
455#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
456
457
458/**
459 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
460 */
461static void *
462iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
463 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb, void **ppvExec)
464{
465 /*
466 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
467 */
468 Assert(!(cToScan & 63));
469 Assert(!(idxFirst & 63));
470 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
471 pbmAlloc += idxFirst / 64;
472
473 /*
474 * Scan the bitmap for cReqUnits of consequtive clear bits
475 */
476 /** @todo This can probably be done more efficiently for non-x86 systems. */
477 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
478 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
479 {
480 uint32_t idxAddBit = 1;
481 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
482 idxAddBit++;
483 if (idxAddBit >= cReqUnits)
484 {
485 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
486
487 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
488 pChunk->cFreeUnits -= cReqUnits;
489 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
490
491 pExecMemAllocator->cAllocations += 1;
492 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
493 pExecMemAllocator->cbAllocated += cbReq;
494 pExecMemAllocator->cbFree -= cbReq;
495 pExecMemAllocator->idxChunkHint = idxChunk;
496
497 void * const pvMemRw = (uint8_t *)pChunk->pvChunkRw
498 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
499
500 /*
501 * Initialize the header and return.
502 */
503# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
504 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMemRw;
505 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
506 pHdr->idxChunk = idxChunk;
507 pHdr->pTb = pTb;
508
509 if (ppvExec)
510 *ppvExec = (uint8_t *)pChunk->pvChunkRx
511 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT)
512 + sizeof(*pHdr);
513
514 return pHdr + 1;
515#else
516 if (ppvExec)
517 *ppvExec = (uint8_t *)pChunk->pvChunkRx
518 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
519
520 RT_NOREF(pTb);
521 return pvMem;
522#endif
523 }
524
525 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
526 }
527 return NULL;
528}
529
530
531static void *
532iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb,
533 void **ppvExec)
534{
535 /*
536 * Figure out how much to allocate.
537 */
538#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
539 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
540#else
541 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
542#endif
543 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
544 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
545 {
546 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
547 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
548 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
549 {
550 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
551 pExecMemAllocator->cUnitsPerChunk - idxHint,
552 cReqUnits, idxChunk, pTb, ppvExec);
553 if (pvRet)
554 return pvRet;
555 }
556 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
557 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
558 cReqUnits, idxChunk, pTb, ppvExec);
559 }
560 return NULL;
561}
562
563
564/**
565 * Allocates @a cbReq bytes of executable memory.
566 *
567 * @returns Pointer to the readable/writeable memory, NULL if out of memory or other problem
568 * encountered.
569 * @param pVCpu The cross context virtual CPU structure of the calling
570 * thread.
571 * @param cbReq How many bytes are required.
572 * @param pTb The translation block that will be using the allocation.
573 * @param ppvExec Where to return the pointer to executable view of the allocated memory, optional.
574 */
575DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb, void **ppvExec) RT_NOEXCEPT
576{
577 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
578 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
579 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
580 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
581
582 for (unsigned iIteration = 0;; iIteration++)
583 {
584 if (cbReq <= pExecMemAllocator->cbFree)
585 {
586 uint32_t const cChunks = pExecMemAllocator->cChunks;
587 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
588 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
589 {
590 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb, ppvExec);
591 if (pvRet)
592 {
593 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
594 return pvRet;
595 }
596 }
597 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
598 {
599 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb, ppvExec);
600 if (pvRet)
601 {
602 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
603 return pvRet;
604 }
605 }
606 }
607
608 /*
609 * Can we grow it with another chunk?
610 */
611 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
612 {
613 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
614 AssertLogRelRCReturn(rc, NULL);
615
616 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
617 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb, ppvExec);
618 if (pvRet)
619 {
620 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
621 return pvRet;
622 }
623 AssertFailed();
624 }
625
626 /*
627 * Try prune native TBs once.
628 */
629 if (iIteration == 0)
630 {
631#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
632 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
633#else
634 /* No header included in the instruction count here. */
635 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
636 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
637#endif
638 }
639 else
640 {
641 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
642 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
643 return NULL;
644 }
645 }
646}
647
648
649/** This is a hook to ensure the instruction cache is properly flushed before the code in the memory
650 * given by @a pv and @a cb is executed */
651DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
652{
653#ifdef RT_OS_DARWIN
654 /*
655 * Flush the instruction cache:
656 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
657 */
658 /* sys_dcache_flush(pv, cb); - not necessary */
659 sys_icache_invalidate(pv, cb);
660 RT_NOREF(pVCpu);
661#elif defined(RT_OS_LINUX)
662 RT_NOREF(pVCpu);
663
664 /* There is __builtin___clear_cache() but it flushes both the instruction and data cache, so do it manually. */
665 static uint32_t s_u32CtrEl0 = 0;
666 if (!s_u32CtrEl0)
667 asm volatile ("mrs %0, ctr_el0":"=r" (s_u32CtrEl0));
668 uintptr_t cbICacheLine = (uintptr_t)4 << (s_u32CtrEl0 & 0xf);
669
670 uintptr_t pb = (uintptr_t)pv & ~(cbICacheLine - 1);
671 for (; pb < (uintptr_t)pv + cb; pb += cbICacheLine)
672 asm volatile ("ic ivau, %0" : : "r" (pb) : "memory");
673
674 asm volatile ("dsb ish\n\t isb\n\t" : : : "memory");
675#else
676 RT_NOREF(pVCpu, pv, cb);
677#endif
678}
679
680
681/**
682 * Frees executable memory.
683 */
684DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
685{
686 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
687 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
688 AssertPtr(pv);
689#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
690 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
691
692 /* Align the size as we did when allocating the block. */
693 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
694
695#else
696 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
697 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
698 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
699 uint32_t const idxChunk = pHdr->idxChunk;
700 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
701 pv = pHdr;
702
703 /* Adjust and align the size to cover the whole allocation area. */
704 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
705#endif
706
707 /* Free it / assert sanity. */
708 bool fFound = false;
709 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
710#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
711 uint32_t const cChunks = pExecMemAllocator->cChunks;
712 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
713#endif
714 {
715 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
716 fFound = offChunk < cbChunk;
717 if (fFound)
718 {
719 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
720 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
721
722 /* Check that it's valid and free it. */
723 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
724 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
725 for (uint32_t i = 1; i < cReqUnits; i++)
726 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
727 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
728
729 /* Invalidate the header using the writeable memory view. */
730 pHdr = (PIEMEXECMEMALLOCHDR)((uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRw + offChunk);
731#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
732 pHdr->uMagic = 0;
733 pHdr->idxChunk = 0;
734 pHdr->pTb = NULL;
735#endif
736 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
737 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
738
739 /* Update the stats. */
740 pExecMemAllocator->cbAllocated -= cb;
741 pExecMemAllocator->cbFree += cb;
742 pExecMemAllocator->cAllocations -= 1;
743 return;
744 }
745 }
746 AssertFailed();
747}
748
749
750
751#ifdef IN_RING3
752# ifdef RT_OS_WINDOWS
753
754/**
755 * Initializes the unwind info structures for windows hosts.
756 */
757static int
758iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
759 void *pvChunk, uint32_t idxChunk)
760{
761 RT_NOREF(pVCpu);
762
763 /*
764 * The AMD64 unwind opcodes.
765 *
766 * This is a program that starts with RSP after a RET instruction that
767 * ends up in recompiled code, and the operations we describe here will
768 * restore all non-volatile registers and bring RSP back to where our
769 * RET address is. This means it's reverse order from what happens in
770 * the prologue.
771 *
772 * Note! Using a frame register approach here both because we have one
773 * and but mainly because the UWOP_ALLOC_LARGE argument values
774 * would be a pain to write initializers for. On the positive
775 * side, we're impervious to changes in the the stack variable
776 * area can can deal with dynamic stack allocations if necessary.
777 */
778 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
779 {
780 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
781 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
782 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
783 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
784 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
785 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
786 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
787 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
788 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
789 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
790 };
791 union
792 {
793 IMAGE_UNWIND_INFO Info;
794 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
795 } s_UnwindInfo =
796 {
797 {
798 /* .Version = */ 1,
799 /* .Flags = */ 0,
800 /* .SizeOfProlog = */ 16, /* whatever */
801 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
802 /* .FrameRegister = */ X86_GREG_xBP,
803 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
804 }
805 };
806 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
807 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
808
809 /*
810 * Calc how much space we need and allocate it off the exec heap.
811 */
812 unsigned const cFunctionEntries = 1;
813 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
814 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
815 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
816 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL, NULL);
817 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
818 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
819
820 /*
821 * Initialize the structures.
822 */
823 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
824
825 paFunctions[0].BeginAddress = 0;
826 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
827 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
828
829 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
830 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
831
832 /*
833 * Register it.
834 */
835 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
836 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
837
838 return VINF_SUCCESS;
839}
840
841
842# else /* !RT_OS_WINDOWS */
843
844/**
845 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
846 */
847DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
848{
849 if (iValue >= 64)
850 {
851 Assert(iValue < 0x2000);
852 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
853 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
854 }
855 else if (iValue >= 0)
856 *Ptr.pb++ = (uint8_t)iValue;
857 else if (iValue > -64)
858 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
859 else
860 {
861 Assert(iValue > -0x2000);
862 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
863 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
864 }
865 return Ptr;
866}
867
868
869/**
870 * Emits an ULEB128 encoded value (up to 64-bit wide).
871 */
872DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
873{
874 while (uValue >= 0x80)
875 {
876 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
877 uValue >>= 7;
878 }
879 *Ptr.pb++ = (uint8_t)uValue;
880 return Ptr;
881}
882
883
884/**
885 * Emits a CFA rule as register @a uReg + offset @a off.
886 */
887DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
888{
889 *Ptr.pb++ = DW_CFA_def_cfa;
890 Ptr = iemDwarfPutUleb128(Ptr, uReg);
891 Ptr = iemDwarfPutUleb128(Ptr, off);
892 return Ptr;
893}
894
895
896/**
897 * Emits a register (@a uReg) save location:
898 * CFA + @a off * data_alignment_factor
899 */
900DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
901{
902 if (uReg < 0x40)
903 *Ptr.pb++ = DW_CFA_offset | uReg;
904 else
905 {
906 *Ptr.pb++ = DW_CFA_offset_extended;
907 Ptr = iemDwarfPutUleb128(Ptr, uReg);
908 }
909 Ptr = iemDwarfPutUleb128(Ptr, off);
910 return Ptr;
911}
912
913
914# if 0 /* unused */
915/**
916 * Emits a register (@a uReg) save location, using signed offset:
917 * CFA + @a offSigned * data_alignment_factor
918 */
919DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
920{
921 *Ptr.pb++ = DW_CFA_offset_extended_sf;
922 Ptr = iemDwarfPutUleb128(Ptr, uReg);
923 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
924 return Ptr;
925}
926# endif
927
928
929/**
930 * Initializes the unwind info section for non-windows hosts.
931 */
932static int
933iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
934 void *pvChunk, uint32_t idxChunk)
935{
936 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
937 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
938
939 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
940
941 /*
942 * Generate the CIE first.
943 */
944# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
945 uint8_t const iDwarfVer = 3;
946# else
947 uint8_t const iDwarfVer = 4;
948# endif
949 RTPTRUNION const PtrCie = Ptr;
950 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
951 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
952 *Ptr.pb++ = iDwarfVer; /* DwARF version */
953 *Ptr.pb++ = 0; /* Augmentation. */
954 if (iDwarfVer >= 4)
955 {
956 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
957 *Ptr.pb++ = 0; /* Segment selector size. */
958 }
959# ifdef RT_ARCH_AMD64
960 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
961# else
962 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
963# endif
964 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
965# ifdef RT_ARCH_AMD64
966 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
967# elif defined(RT_ARCH_ARM64)
968 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
969# else
970# error "port me"
971# endif
972 /* Initial instructions: */
973# ifdef RT_ARCH_AMD64
974 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
975 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
976 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
977 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
978 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
979 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
980 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
981 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
982# elif defined(RT_ARCH_ARM64)
983# if 1
984 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
985# else
986 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
987# endif
988 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
989 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
990 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
991 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
992 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
993 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
994 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
995 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
996 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
997 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
998 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
999 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1000 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1001 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1002# else
1003# error "port me"
1004# endif
1005 while ((Ptr.u - PtrCie.u) & 3)
1006 *Ptr.pb++ = DW_CFA_nop;
1007 /* Finalize the CIE size. */
1008 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1009
1010 /*
1011 * Generate an FDE for the whole chunk area.
1012 */
1013# ifdef IEMNATIVE_USE_LIBUNWIND
1014 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1015# endif
1016 RTPTRUNION const PtrFde = Ptr;
1017 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1018 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1019 Ptr.pu32++;
1020 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1021 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1022# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1023 *Ptr.pb++ = DW_CFA_nop;
1024# endif
1025 while ((Ptr.u - PtrFde.u) & 3)
1026 *Ptr.pb++ = DW_CFA_nop;
1027 /* Finalize the FDE size. */
1028 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1029
1030 /* Terminator entry. */
1031 *Ptr.pu32++ = 0;
1032 *Ptr.pu32++ = 0; /* just to be sure... */
1033 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1034
1035 /*
1036 * Register it.
1037 */
1038# ifdef IEMNATIVE_USE_LIBUNWIND
1039 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1040# else
1041 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1042 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1043# endif
1044
1045# ifdef IEMNATIVE_USE_GDB_JIT
1046 /*
1047 * Now for telling GDB about this (experimental).
1048 *
1049 * This seems to work best with ET_DYN.
1050 */
1051 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1052 sizeof(GDBJITSYMFILE), NULL, NULL);
1053 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1054 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1055
1056 RT_ZERO(*pSymFile);
1057
1058 /*
1059 * The ELF header:
1060 */
1061 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1062 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1063 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1064 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1065 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1066 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1067 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1068 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1069# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1070 pSymFile->EHdr.e_type = ET_DYN;
1071# else
1072 pSymFile->EHdr.e_type = ET_REL;
1073# endif
1074# ifdef RT_ARCH_AMD64
1075 pSymFile->EHdr.e_machine = EM_AMD64;
1076# elif defined(RT_ARCH_ARM64)
1077 pSymFile->EHdr.e_machine = EM_AARCH64;
1078# else
1079# error "port me"
1080# endif
1081 pSymFile->EHdr.e_version = 1; /*?*/
1082 pSymFile->EHdr.e_entry = 0;
1083# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1084 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1085# else
1086 pSymFile->EHdr.e_phoff = 0;
1087# endif
1088 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1089 pSymFile->EHdr.e_flags = 0;
1090 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1091# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1092 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1093 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1094# else
1095 pSymFile->EHdr.e_phentsize = 0;
1096 pSymFile->EHdr.e_phnum = 0;
1097# endif
1098 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1099 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1100 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1101
1102 uint32_t offStrTab = 0;
1103#define APPEND_STR(a_szStr) do { \
1104 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1105 offStrTab += sizeof(a_szStr); \
1106 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1107 } while (0)
1108#define APPEND_STR_FMT(a_szStr, ...) do { \
1109 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1110 offStrTab++; \
1111 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1112 } while (0)
1113
1114 /*
1115 * Section headers.
1116 */
1117 /* Section header #0: NULL */
1118 unsigned i = 0;
1119 APPEND_STR("");
1120 RT_ZERO(pSymFile->aShdrs[i]);
1121 i++;
1122
1123 /* Section header: .eh_frame */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".eh_frame");
1126 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1128# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1129 pSymFile->aShdrs[i].sh_offset
1130 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1131# else
1132 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1133 pSymFile->aShdrs[i].sh_offset = 0;
1134# endif
1135
1136 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1137 pSymFile->aShdrs[i].sh_link = 0;
1138 pSymFile->aShdrs[i].sh_info = 0;
1139 pSymFile->aShdrs[i].sh_addralign = 1;
1140 pSymFile->aShdrs[i].sh_entsize = 0;
1141 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1142 i++;
1143
1144 /* Section header: .shstrtab */
1145 unsigned const iShStrTab = i;
1146 pSymFile->EHdr.e_shstrndx = iShStrTab;
1147 pSymFile->aShdrs[i].sh_name = offStrTab;
1148 APPEND_STR(".shstrtab");
1149 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1150 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1151# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1152 pSymFile->aShdrs[i].sh_offset
1153 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1154# else
1155 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1156 pSymFile->aShdrs[i].sh_offset = 0;
1157# endif
1158 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1159 pSymFile->aShdrs[i].sh_link = 0;
1160 pSymFile->aShdrs[i].sh_info = 0;
1161 pSymFile->aShdrs[i].sh_addralign = 1;
1162 pSymFile->aShdrs[i].sh_entsize = 0;
1163 i++;
1164
1165 /* Section header: .symbols */
1166 pSymFile->aShdrs[i].sh_name = offStrTab;
1167 APPEND_STR(".symtab");
1168 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1169 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1170 pSymFile->aShdrs[i].sh_offset
1171 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1172 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1173 pSymFile->aShdrs[i].sh_link = iShStrTab;
1174 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1175 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1176 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1177 i++;
1178
1179# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1180 /* Section header: .symbols */
1181 pSymFile->aShdrs[i].sh_name = offStrTab;
1182 APPEND_STR(".dynsym");
1183 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1184 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1185 pSymFile->aShdrs[i].sh_offset
1186 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1187 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1188 pSymFile->aShdrs[i].sh_link = iShStrTab;
1189 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1190 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1191 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1192 i++;
1193# endif
1194
1195# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1196 /* Section header: .dynamic */
1197 pSymFile->aShdrs[i].sh_name = offStrTab;
1198 APPEND_STR(".dynamic");
1199 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1200 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1201 pSymFile->aShdrs[i].sh_offset
1202 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1203 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1204 pSymFile->aShdrs[i].sh_link = iShStrTab;
1205 pSymFile->aShdrs[i].sh_info = 0;
1206 pSymFile->aShdrs[i].sh_addralign = 1;
1207 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1208 i++;
1209# endif
1210
1211 /* Section header: .text */
1212 unsigned const iShText = i;
1213 pSymFile->aShdrs[i].sh_name = offStrTab;
1214 APPEND_STR(".text");
1215 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1216 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1217# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1218 pSymFile->aShdrs[i].sh_offset
1219 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1220# else
1221 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1222 pSymFile->aShdrs[i].sh_offset = 0;
1223# endif
1224 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1225 pSymFile->aShdrs[i].sh_link = 0;
1226 pSymFile->aShdrs[i].sh_info = 0;
1227 pSymFile->aShdrs[i].sh_addralign = 1;
1228 pSymFile->aShdrs[i].sh_entsize = 0;
1229 i++;
1230
1231 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1232
1233# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1234 /*
1235 * The program headers:
1236 */
1237 /* Everything in a single LOAD segment: */
1238 i = 0;
1239 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1240 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1241 pSymFile->aPhdrs[i].p_offset
1242 = pSymFile->aPhdrs[i].p_vaddr
1243 = pSymFile->aPhdrs[i].p_paddr = 0;
1244 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1245 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1246 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1247 i++;
1248 /* The .dynamic segment. */
1249 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1250 pSymFile->aPhdrs[i].p_flags = PF_R;
1251 pSymFile->aPhdrs[i].p_offset
1252 = pSymFile->aPhdrs[i].p_vaddr
1253 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1254 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1255 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1256 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1257 i++;
1258
1259 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1260
1261 /*
1262 * The dynamic section:
1263 */
1264 i = 0;
1265 pSymFile->aDyn[i].d_tag = DT_SONAME;
1266 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1267 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1268 i++;
1269 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1270 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1271 i++;
1272 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1273 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1274 i++;
1275 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1276 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1277 i++;
1278 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1279 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1280 i++;
1281 pSymFile->aDyn[i].d_tag = DT_NULL;
1282 i++;
1283 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1284# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1285
1286 /*
1287 * Symbol tables:
1288 */
1289 /** @todo gdb doesn't seem to really like this ... */
1290 i = 0;
1291 pSymFile->aSymbols[i].st_name = 0;
1292 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1293 pSymFile->aSymbols[i].st_value = 0;
1294 pSymFile->aSymbols[i].st_size = 0;
1295 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1296 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1297# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1298 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1299# endif
1300 i++;
1301
1302 pSymFile->aSymbols[i].st_name = 0;
1303 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1304 pSymFile->aSymbols[i].st_value = 0;
1305 pSymFile->aSymbols[i].st_size = 0;
1306 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1307 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1308 i++;
1309
1310 pSymFile->aSymbols[i].st_name = offStrTab;
1311 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1312# if 0
1313 pSymFile->aSymbols[i].st_shndx = iShText;
1314 pSymFile->aSymbols[i].st_value = 0;
1315# else
1316 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1317 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1318# endif
1319 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1320 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1321 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1322# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1323 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1324 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1325# endif
1326 i++;
1327
1328 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1329 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1330
1331 /*
1332 * The GDB JIT entry and informing GDB.
1333 */
1334 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1335# if 1
1336 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1337# else
1338 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1339# endif
1340
1341 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1342 RTCritSectEnter(&g_IemNativeGdbJitLock);
1343 pEhFrame->GdbJitEntry.pNext = NULL;
1344 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1345 if (__jit_debug_descriptor.pTail)
1346 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1347 else
1348 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1349 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1350 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1351
1352 /* Notify GDB: */
1353 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1354 __jit_debug_register_code();
1355 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1356 RTCritSectLeave(&g_IemNativeGdbJitLock);
1357
1358# else /* !IEMNATIVE_USE_GDB_JIT */
1359 RT_NOREF(pVCpu);
1360# endif /* !IEMNATIVE_USE_GDB_JIT */
1361
1362 return VINF_SUCCESS;
1363}
1364
1365# endif /* !RT_OS_WINDOWS */
1366#endif /* IN_RING3 */
1367
1368
1369/**
1370 * Adds another chunk to the executable memory allocator.
1371 *
1372 * This is used by the init code for the initial allocation and later by the
1373 * regular allocator function when it's out of memory.
1374 */
1375static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1376{
1377 /* Check that we've room for growth. */
1378 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1379 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1380
1381 /* Allocate a chunk. */
1382#ifdef RT_OS_DARWIN
1383 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1384#else
1385 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1386#endif
1387 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1388
1389#ifdef RT_OS_DARWIN
1390 /*
1391 * Because it is impossible to have a RWX memory allocation on macOS try to remap the memory
1392 * chunk readable/executable somewhere else so we can save us the hassle of switching between
1393 * protections when exeuctable memory is allocated.
1394 */
1395 mach_port_t hPortTask = mach_task_self();
1396 mach_vm_address_t AddrChunk = (mach_vm_address_t)pvChunk;
1397 mach_vm_address_t AddrRemapped = 0;
1398 vm_prot_t ProtCur, ProtMax;
1399 kern_return_t krc = mach_vm_remap(hPortTask, &AddrRemapped, pExecMemAllocator->cbChunk, 0,
1400 VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
1401 hPortTask, AddrChunk, FALSE, &ProtCur, &ProtMax,
1402 VM_INHERIT_NONE);
1403 if (krc != KERN_SUCCESS)
1404 {
1405 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1406 AssertLogRelFailed();
1407 return VERR_NO_EXEC_MEMORY;
1408 }
1409
1410 krc = mach_vm_protect(mach_task_self(), AddrRemapped, pExecMemAllocator->cbChunk, FALSE, VM_PROT_READ | VM_PROT_EXECUTE);
1411 if (krc != KERN_SUCCESS)
1412 {
1413 krc = mach_vm_deallocate(hPortTask, AddrRemapped, pExecMemAllocator->cbChunk);
1414 Assert(krc == KERN_SUCCESS);
1415 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1416 AssertLogRelFailed();
1417 return VERR_NO_EXEC_MEMORY;
1418 }
1419
1420 void *pvChunkRx = (void *)AddrRemapped;
1421#else
1422 void *pvChunkRx = pvChunk;
1423#endif
1424
1425 /*
1426 * Add the chunk.
1427 *
1428 * This must be done before the unwind init so windows can allocate
1429 * memory from the chunk when using the alternative sub-allocator.
1430 */
1431 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = pvChunk;
1432 pExecMemAllocator->aChunks[idxChunk].pvChunkRx = pvChunkRx;
1433#ifdef IN_RING3
1434 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1435#endif
1436 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1437 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1438 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1439 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1440
1441 pExecMemAllocator->cChunks = idxChunk + 1;
1442 pExecMemAllocator->idxChunkHint = idxChunk;
1443
1444 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1445 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1446
1447#ifdef IN_RING3
1448 /*
1449 * Initialize the unwind information (this cannot really fail atm).
1450 * (This sets pvUnwindInfo.)
1451 */
1452 int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunkRx, idxChunk);
1453 if (RT_SUCCESS(rc))
1454 { /* likely */ }
1455 else
1456 {
1457 /* Just in case the impossible happens, undo the above up: */
1458 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1459 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1460 pExecMemAllocator->cChunks = idxChunk;
1461 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1462 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1463 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = NULL;
1464 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1465
1466#ifdef RT_OS_DARWIN
1467 krc = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx,
1468 pExecMemAllocator->cbChunk);
1469 Assert(krc == KERN_SUCCESS);
1470#endif
1471
1472 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1473 return rc;
1474 }
1475#endif
1476 return VINF_SUCCESS;
1477}
1478
1479
1480/**
1481 * Initializes the executable memory allocator for native recompilation on the
1482 * calling EMT.
1483 *
1484 * @returns VBox status code.
1485 * @param pVCpu The cross context virtual CPU structure of the calling
1486 * thread.
1487 * @param cbMax The max size of the allocator.
1488 * @param cbInitial The initial allocator size.
1489 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1490 * dependent).
1491 */
1492int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1493{
1494 /*
1495 * Validate input.
1496 */
1497 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1498 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1499 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1500 || cbChunk == 0
1501 || ( RT_IS_POWER_OF_TWO(cbChunk)
1502 && cbChunk >= _1M
1503 && cbChunk <= _256M
1504 && cbChunk <= cbMax),
1505 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1506 VERR_OUT_OF_RANGE);
1507
1508 /*
1509 * Adjust/figure out the chunk size.
1510 */
1511 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1512 {
1513 if (cbMax >= _256M)
1514 cbChunk = _64M;
1515 else
1516 {
1517 if (cbMax < _16M)
1518 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1519 else
1520 cbChunk = (uint32_t)cbMax / 4;
1521 if (!RT_IS_POWER_OF_TWO(cbChunk))
1522 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1523 }
1524 }
1525
1526 if (cbChunk > cbMax)
1527 cbMax = cbChunk;
1528 else
1529 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1530 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1531 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1532
1533 /*
1534 * Allocate and initialize the allocatore instance.
1535 */
1536 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1537 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1538 size_t cbNeeded = offBitmaps + cbBitmaps;
1539 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1540 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1543 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1544#endif
1545 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1546 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1547 VERR_NO_MEMORY);
1548 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1549 pExecMemAllocator->cbChunk = cbChunk;
1550 pExecMemAllocator->cMaxChunks = cMaxChunks;
1551 pExecMemAllocator->cChunks = 0;
1552 pExecMemAllocator->idxChunkHint = 0;
1553 pExecMemAllocator->cAllocations = 0;
1554 pExecMemAllocator->cbTotal = 0;
1555 pExecMemAllocator->cbFree = 0;
1556 pExecMemAllocator->cbAllocated = 0;
1557 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1558 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1559 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1560 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1561#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1562 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1563#endif
1564 for (uint32_t i = 0; i < cMaxChunks; i++)
1565 {
1566 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1567 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1568 pExecMemAllocator->aChunks[i].pvChunkRw = NULL;
1569#ifdef IN_RING0
1570 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1571#else
1572 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1573#endif
1574 }
1575 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1576
1577 /*
1578 * Do the initial allocations.
1579 */
1580 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1581 {
1582 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1583 AssertLogRelRCReturn(rc, rc);
1584 }
1585
1586 pExecMemAllocator->idxChunkHint = 0;
1587
1588 /*
1589 * Register statistics.
1590 */
1591 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1592 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1593 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1594 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1595 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1596 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1597 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1598 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1599 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1600 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1601 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1602 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1603 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1604 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1605 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1606#ifdef VBOX_WITH_STATISTICS
1607 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1608 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1609#endif
1610#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1611 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1612 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1613 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1614 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1615#endif
1616
1617 return VINF_SUCCESS;
1618}
1619
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette