VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 106212

Last change on this file since 106212 was 106128, checked in by vboxsync, 2 months ago

VMM/IEM: Docs. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 67.0 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 106128 2024-09-23 22:53:44Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Executable Memory Allocator.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
35#include <VBox/vmm/iem.h>
36#include <VBox/vmm/cpum.h>
37#include "IEMInternal.h"
38#include <VBox/vmm/vmcc.h>
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <VBox/param.h>
42#include <iprt/assert.h>
43#include <iprt/mem.h>
44#include <iprt/string.h>
45#if defined(RT_ARCH_AMD64)
46# include <iprt/x86.h>
47#elif defined(RT_ARCH_ARM64)
48# include <iprt/armv8.h>
49#endif
50
51#ifdef RT_OS_WINDOWS
52# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
53extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
54extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
55#else
56# include <iprt/formats/dwarf.h>
57# if defined(RT_OS_DARWIN)
58# include <libkern/OSCacheControl.h>
59# include <mach/mach.h>
60# include <mach/mach_vm.h>
61# define IEMNATIVE_USE_LIBUNWIND
62extern "C" void __register_frame(const void *pvFde);
63extern "C" void __deregister_frame(const void *pvFde);
64# else
65# ifdef DEBUG_bird /** @todo not thread safe yet */
66# define IEMNATIVE_USE_GDB_JIT
67# endif
68# ifdef IEMNATIVE_USE_GDB_JIT
69# include <iprt/critsect.h>
70# include <iprt/once.h>
71# include <iprt/formats/elf64.h>
72# endif
73extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
74extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
75# endif
76#endif
77
78#include "IEMN8veRecompiler.h"
79
80
81/*********************************************************************************************************************************
82* Executable Memory Allocator *
83*********************************************************************************************************************************/
84/** The chunk sub-allocation unit size in bytes. */
85#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
86/** The chunk sub-allocation unit size as a shift factor. */
87#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
88/** Enables adding a header to the sub-allocator allocations.
89 * This is useful for freeing up executable memory among other things. */
90#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
91/** Use alternative pruning. */
92#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
93
94
95#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
96# ifdef IEMNATIVE_USE_GDB_JIT
97# define IEMNATIVE_USE_GDB_JIT_ET_DYN
98
99/** GDB JIT: Code entry. */
100typedef struct GDBJITCODEENTRY
101{
102 struct GDBJITCODEENTRY *pNext;
103 struct GDBJITCODEENTRY *pPrev;
104 uint8_t *pbSymFile;
105 uint64_t cbSymFile;
106} GDBJITCODEENTRY;
107
108/** GDB JIT: Actions. */
109typedef enum GDBJITACTIONS : uint32_t
110{
111 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
112} GDBJITACTIONS;
113
114/** GDB JIT: Descriptor. */
115typedef struct GDBJITDESCRIPTOR
116{
117 uint32_t uVersion;
118 GDBJITACTIONS enmAction;
119 GDBJITCODEENTRY *pRelevant;
120 GDBJITCODEENTRY *pHead;
121 /** Our addition: */
122 GDBJITCODEENTRY *pTail;
123} GDBJITDESCRIPTOR;
124
125/** GDB JIT: Our simple symbol file data. */
126typedef struct GDBJITSYMFILE
127{
128 Elf64_Ehdr EHdr;
129# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
130 Elf64_Shdr aShdrs[5];
131# else
132 Elf64_Shdr aShdrs[7];
133 Elf64_Phdr aPhdrs[2];
134# endif
135 /** The dwarf ehframe data for the chunk. */
136 uint8_t abEhFrame[512];
137 char szzStrTab[128];
138 Elf64_Sym aSymbols[3];
139# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
140 Elf64_Sym aDynSyms[2];
141 Elf64_Dyn aDyn[6];
142# endif
143} GDBJITSYMFILE;
144
145extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
146extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
147
148/** Init once for g_IemNativeGdbJitLock. */
149static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
150/** Init once for the critical section. */
151static RTCRITSECT g_IemNativeGdbJitLock;
152
153/** GDB reads the info here. */
154GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
155
156/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
157DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
158{
159 ASMNopPause();
160}
161
162/** @callback_method_impl{FNRTONCE} */
163static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
164{
165 RT_NOREF(pvUser);
166 return RTCritSectInit(&g_IemNativeGdbJitLock);
167}
168
169
170# endif /* IEMNATIVE_USE_GDB_JIT */
171
172/**
173 * Per-chunk unwind info for non-windows hosts.
174 */
175typedef struct IEMEXECMEMCHUNKEHFRAME
176{
177# ifdef IEMNATIVE_USE_LIBUNWIND
178 /** The offset of the FDA into abEhFrame. */
179 uintptr_t offFda;
180# else
181 /** 'struct object' storage area. */
182 uint8_t abObject[1024];
183# endif
184# ifdef IEMNATIVE_USE_GDB_JIT
185# if 0
186 /** The GDB JIT 'symbol file' data. */
187 GDBJITSYMFILE GdbJitSymFile;
188# endif
189 /** The GDB JIT list entry. */
190 GDBJITCODEENTRY GdbJitEntry;
191# endif
192 /** The dwarf ehframe data for the chunk. */
193 uint8_t abEhFrame[512];
194} IEMEXECMEMCHUNKEHFRAME;
195/** Pointer to per-chunk info info for non-windows hosts. */
196typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
197#endif
198
199
200/**
201 * An chunk of executable memory.
202 */
203typedef struct IEMEXECMEMCHUNK
204{
205 /** Number of free items in this chunk. */
206 uint32_t cFreeUnits;
207 /** Hint were to start searching for free space in the allocation bitmap. */
208 uint32_t idxFreeHint;
209 /** Pointer to the readable/writeable view of the memory chunk. */
210 void *pvChunkRw;
211 /** Pointer to the readable/executable view of the memory chunk. */
212 void *pvChunkRx;
213 /** Pointer to the context structure detailing the per chunk common code. */
214 PCIEMNATIVEPERCHUNKCTX pCtx;
215#ifdef IN_RING3
216 /**
217 * Pointer to the unwind information.
218 *
219 * This is used during C++ throw and longjmp (windows and probably most other
220 * platforms). Some debuggers (windbg) makes use of it as well.
221 *
222 * Windows: This is allocated from hHeap on windows because (at least for
223 * AMD64) the UNWIND_INFO structure address in the
224 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
225 *
226 * Others: Allocated from the regular heap to avoid unnecessary executable data
227 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
228 void *pvUnwindInfo;
229#elif defined(IN_RING0)
230 /** Allocation handle. */
231 RTR0MEMOBJ hMemObj;
232#endif
233} IEMEXECMEMCHUNK;
234/** Pointer to a memory chunk. */
235typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
236
237
238/**
239 * Executable memory allocator for the native recompiler.
240 */
241typedef struct IEMEXECMEMALLOCATOR
242{
243 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
244 uint32_t uMagic;
245
246 /** The chunk size. */
247 uint32_t cbChunk;
248 /** The maximum number of chunks. */
249 uint32_t cMaxChunks;
250 /** The current number of chunks. */
251 uint32_t cChunks;
252 /** Hint where to start looking for available memory. */
253 uint32_t idxChunkHint;
254 /** Statistics: Current number of allocations. */
255 uint32_t cAllocations;
256
257 /** The total amount of memory available. */
258 uint64_t cbTotal;
259 /** Total amount of free memory. */
260 uint64_t cbFree;
261 /** Total amount of memory allocated. */
262 uint64_t cbAllocated;
263
264 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
265 *
266 * Since the chunk size is a power of two and the minimum chunk size is a lot
267 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
268 * require a whole number of uint64_t elements in the allocation bitmap. So,
269 * for sake of simplicity, they are allocated as one continous chunk for
270 * simplicity/laziness. */
271 uint64_t *pbmAlloc;
272 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
273 uint32_t cUnitsPerChunk;
274 /** Number of bitmap elements per chunk (for quickly locating the bitmap
275 * portion corresponding to an chunk). */
276 uint32_t cBitmapElementsPerChunk;
277
278#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
279 /** The next chunk to prune in. */
280 uint32_t idxChunkPrune;
281 /** Where in chunk offset to start pruning at. */
282 uint32_t offChunkPrune;
283 /** Profiling the pruning code. */
284 STAMPROFILE StatPruneProf;
285 /** Number of bytes recovered by the pruning. */
286 STAMPROFILE StatPruneRecovered;
287#endif
288
289#ifdef VBOX_WITH_STATISTICS
290 STAMPROFILE StatAlloc;
291 /** Total amount of memory not being usable currently due to IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE. */
292 uint64_t cbUnusable;
293#endif
294
295
296#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
297 /** Pointer to the array of unwind info running parallel to aChunks (same
298 * allocation as this structure, located after the bitmaps).
299 * (For Windows, the structures must reside in 32-bit RVA distance to the
300 * actual chunk, so they are allocated off the chunk.) */
301 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
302#endif
303
304 /** The allocation chunks. */
305 RT_FLEXIBLE_ARRAY_EXTENSION
306 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
307} IEMEXECMEMALLOCATOR;
308/** Pointer to an executable memory allocator. */
309typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
310
311/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
312#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
313
314
315#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
316/**
317 * Allocation header.
318 */
319typedef struct IEMEXECMEMALLOCHDR
320{
321 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
322 uint32_t uMagic;
323 /** The allocation chunk (for speeding up freeing). */
324 uint32_t idxChunk;
325 /** Pointer to the translation block the allocation belongs to.
326 * This is the whole point of the header. */
327 PIEMTB pTb;
328} IEMEXECMEMALLOCHDR;
329/** Pointer to an allocation header. */
330typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
331/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
332# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
333#endif
334
335
336static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
337
338
339#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
340/**
341 * Frees up executable memory when we're out space.
342 *
343 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
344 * space in a more linear fashion from the allocator's point of view. It may
345 * also defragment if implemented & enabled
346 */
347static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
348{
349# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
350# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
351# endif
352 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
353
354 /*
355 * Before we can start, we must process delayed frees.
356 */
357 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
358
359 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
360
361 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
362 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
363 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
364
365 uint32_t const cChunks = pExecMemAllocator->cChunks;
366 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
367 AssertReturnVoid(cChunks >= 1);
368
369 Assert(!pVCpu->iem.s.pCurTbR3);
370
371 /*
372 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
373 * scanning a multiple of two here as well.
374 */
375 uint32_t cbToPrune = cbChunk;
376
377 /* Never more than 25%. */
378 if (cChunks < 4)
379 cbToPrune /= cChunks == 1 ? 4 : 2;
380
381 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
382 if (cbToPrune > _4M)
383 cbToPrune = _4M;
384
385 /*
386 * Adjust the pruning chunk and offset accordingly.
387 */
388 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
389 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
390 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
391 if (offChunk >= cbChunk)
392 {
393 offChunk = 0;
394 idxChunk += 1;
395 }
396 if (idxChunk >= cChunks)
397 {
398 offChunk = 0;
399 idxChunk = 0;
400 }
401
402 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
403
404 /*
405 * Do the pruning. The current approach is the sever kind.
406 */
407 uint64_t cbPruned = 0;
408 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
409 while (offChunk < offPruneEnd)
410 {
411 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
412
413 /* Is this the start of an allocation block for TB? (We typically have
414 one allocation at the start of each chunk for the unwind info where
415 pTb is NULL.) */
416 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
417 && pHdr->pTb != NULL
418 && pHdr->idxChunk == idxChunk)
419 {
420 PIEMTB const pTb = pHdr->pTb;
421 AssertPtr(pTb);
422
423 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
424 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
425 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
426
427 iemTbAllocatorFree(pVCpu, pTb);
428
429 cbPruned += cbBlock;
430 offChunk += cbBlock;
431 }
432 else
433 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
434 }
435 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
436
437 /*
438 * Save the current pruning point.
439 */
440 pExecMemAllocator->offChunkPrune = offChunk;
441 pExecMemAllocator->idxChunkPrune = idxChunk;
442
443 /* Set the hint to the start of the pruned region. */
444 pExecMemAllocator->idxChunkHint = idxChunk;
445 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = offChunk / IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
446
447 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
448}
449#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
450
451
452/**
453 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
454 */
455static void *
456iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
457 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb,
458 void **ppvExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
459{
460 /*
461 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
462 */
463 Assert(!(cToScan & 63));
464 Assert(!(idxFirst & 63));
465 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
466 pbmAlloc += idxFirst / 64;
467
468 /*
469 * Scan the bitmap for cReqUnits of consequtive clear bits
470 */
471 /** @todo This can probably be done more efficiently for non-x86 systems. */
472 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
473 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
474 {
475 uint32_t idxAddBit = 1;
476 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
477 idxAddBit++;
478 if (idxAddBit >= cReqUnits)
479 {
480 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
481
482 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
483 pChunk->cFreeUnits -= cReqUnits;
484 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
485
486 pExecMemAllocator->cAllocations += 1;
487 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
488 pExecMemAllocator->cbAllocated += cbReq;
489 pExecMemAllocator->cbFree -= cbReq;
490 pExecMemAllocator->idxChunkHint = idxChunk;
491
492 void * const pvMemRw = (uint8_t *)pChunk->pvChunkRw
493 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
494
495 if (ppChunkCtx)
496 *ppChunkCtx = pChunk->pCtx;
497
498 /*
499 * Initialize the header and return.
500 */
501# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
502 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMemRw;
503 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
504 pHdr->idxChunk = idxChunk;
505 pHdr->pTb = pTb;
506
507 if (ppvExec)
508 *ppvExec = (uint8_t *)pChunk->pvChunkRx
509 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT)
510 + sizeof(*pHdr);
511
512 return pHdr + 1;
513#else
514 if (ppvExec)
515 *ppvExec = (uint8_t *)pChunk->pvChunkRx
516 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
517
518 RT_NOREF(pTb);
519 return pvMem;
520#endif
521 }
522
523 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
524 }
525 return NULL;
526}
527
528
529static PIEMNATIVEINSTR
530iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb,
531 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
532{
533 /*
534 * Figure out how much to allocate.
535 */
536#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
537 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
538#else
539 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
540#endif
541 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
542 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
543 {
544 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
545 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
546 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
547 {
548 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
549 pExecMemAllocator->cUnitsPerChunk - idxHint,
550 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
551 if (pvRet)
552 {
553#ifdef VBOX_WITH_STATISTICS
554 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq;
555#endif
556 return (PIEMNATIVEINSTR)pvRet;
557 }
558 }
559 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
560 RT_MIN(pExecMemAllocator->cUnitsPerChunk,
561 RT_ALIGN_32(idxHint + cReqUnits, 64)),
562 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
563#ifdef VBOX_WITH_STATISTICS
564 if (pvRet)
565 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq;
566#endif
567 return (PIEMNATIVEINSTR)pvRet;
568 }
569 return NULL;
570}
571
572
573/**
574 * Allocates @a cbReq bytes of executable memory.
575 *
576 * @returns Pointer to the readable/writeable memory, NULL if out of memory or other problem
577 * encountered.
578 * @param pVCpu The cross context virtual CPU structure of the
579 * calling thread.
580 * @param cbReq How many bytes are required.
581 * @param pTb The translation block that will be using the allocation.
582 * @param ppaExec Where to return the pointer to executable view of
583 * the allocated memory, optional.
584 * @param ppChunkCtx Where to return the per chunk attached context
585 * if available, optional.
586 */
587DECLHIDDEN(PIEMNATIVEINSTR) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb,
588 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx) RT_NOEXCEPT
589{
590 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
591 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
592 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
593 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
594
595 for (unsigned iIteration = 0;; iIteration++)
596 {
597 if (cbReq <= pExecMemAllocator->cbFree)
598 {
599 uint32_t const cChunks = pExecMemAllocator->cChunks;
600 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
601 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
602 {
603 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
604 ppaExec, ppChunkCtx);
605 if (pRet)
606 {
607 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
608 return pRet;
609 }
610 }
611 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
612 {
613 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
614 ppaExec, ppChunkCtx);
615 if (pRet)
616 {
617 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
618 return pRet;
619 }
620 }
621 }
622
623 /*
624 * Can we grow it with another chunk?
625 */
626 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
627 {
628 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
629 AssertLogRelRCReturn(rc, NULL);
630
631 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
632 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
633 ppaExec, ppChunkCtx);
634 if (pRet)
635 {
636 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
637 return pRet;
638 }
639 AssertFailed();
640 }
641
642 /*
643 * Try prune native TBs once.
644 */
645 if (iIteration == 0)
646 {
647#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
648 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
649#else
650 /* No header included in the instruction count here. */
651 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
652 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
653#endif
654 }
655 else
656 {
657 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
658 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
659 return NULL;
660 }
661 }
662}
663
664
665/** This is a hook to ensure the instruction cache is properly flushed before the code in the memory
666 * given by @a pv and @a cb is executed */
667DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
668{
669#ifdef RT_OS_DARWIN
670 /*
671 * Flush the instruction cache:
672 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
673 */
674 /* sys_dcache_flush(pv, cb); - not necessary */
675 sys_icache_invalidate(pv, cb);
676 RT_NOREF(pVCpu);
677
678#elif defined(RT_OS_LINUX) && defined(RT_ARCH_ARM64)
679 RT_NOREF(pVCpu);
680
681 /* There is __builtin___clear_cache() but it flushes both the instruction and data cache, so do it manually. */
682 static uint32_t s_u32CtrEl0 = 0;
683 if (!s_u32CtrEl0)
684 asm volatile ("mrs %0, ctr_el0":"=r" (s_u32CtrEl0));
685 uintptr_t cbICacheLine = (uintptr_t)4 << (s_u32CtrEl0 & 0xf);
686
687 uintptr_t pb = (uintptr_t)pv & ~(cbICacheLine - 1);
688 for (; pb < (uintptr_t)pv + cb; pb += cbICacheLine)
689 asm volatile ("ic ivau, %0" : : "r" (pb) : "memory");
690
691 asm volatile ("dsb ish\n\t isb\n\t" : : : "memory");
692
693#else
694 RT_NOREF(pVCpu, pv, cb);
695#endif
696}
697
698
699/**
700 * Frees executable memory.
701 */
702DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
703{
704 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
705 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
706 AssertPtr(pv);
707#ifdef VBOX_WITH_STATISTICS
708 size_t const cbOrig = cb;
709#endif
710#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
711 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
712
713 /* Align the size as we did when allocating the block. */
714 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
715
716#else
717 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
718 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
719 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
720 uint32_t const idxChunk = pHdr->idxChunk;
721 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
722 pv = pHdr;
723
724 /* Adjust and align the size to cover the whole allocation area. */
725 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
726#endif
727
728 /* Free it / assert sanity. */
729 bool fFound = false;
730 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
731#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
732 uint32_t const cChunks = pExecMemAllocator->cChunks;
733 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
734#endif
735 {
736 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
737 fFound = offChunk < cbChunk;
738 if (fFound)
739 {
740 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
741 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
742
743 /* Check that it's valid and free it. */
744 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
745 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
746 for (uint32_t i = 1; i < cReqUnits; i++)
747 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
748 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
749
750 /* Invalidate the header using the writeable memory view. */
751 pHdr = (PIEMEXECMEMALLOCHDR)((uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRw + offChunk);
752#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
753 pHdr->uMagic = 0;
754 pHdr->idxChunk = 0;
755 pHdr->pTb = NULL;
756#endif
757 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
758 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
759
760 /* Update the stats. */
761 pExecMemAllocator->cbAllocated -= cb;
762 pExecMemAllocator->cbFree += cb;
763 pExecMemAllocator->cAllocations -= 1;
764#ifdef VBOX_WITH_STATISTICS
765 pExecMemAllocator->cbUnusable -= (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbOrig;
766#endif
767 return;
768 }
769 }
770 AssertFailed();
771}
772
773
774/**
775 * Interface used by iemNativeRecompileAttachExecMemChunkCtx and unwind info
776 * generators.
777 */
778DECLHIDDEN(PIEMNATIVEINSTR)
779iemExecMemAllocatorAllocFromChunk(PVMCPU pVCpu, uint32_t idxChunk, uint32_t cbReq, PIEMNATIVEINSTR *ppaExec)
780{
781 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
782 AssertReturn(idxChunk < pExecMemAllocator->cChunks, NULL);
783 Assert(cbReq < _1M);
784 return iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, NULL /*pTb*/, ppaExec, NULL /*ppChunkCtx*/);
785}
786
787
788/**
789 * For getting the per-chunk context detailing common code for a TB.
790 *
791 * This is for use by the disassembler.
792 */
793DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemExecMemGetTbChunkCtx(PVMCPU pVCpu, PCIEMTB pTb)
794{
795 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
796 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
797 {
798 uintptr_t const uAddress = (uintptr_t)pTb->Native.paInstructions;
799 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
800 uint32_t idxChunk = pExecMemAllocator->cChunks;
801 while (idxChunk-- > 0)
802 if (uAddress - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx < cbChunk)
803 return pExecMemAllocator->aChunks[idxChunk].pCtx;
804 }
805 return NULL;
806}
807
808
809#ifdef IN_RING3
810# ifdef RT_OS_WINDOWS
811
812/**
813 * Initializes the unwind info structures for windows hosts.
814 */
815static int
816iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
817 void *pvChunk, uint32_t idxChunk)
818{
819 RT_NOREF(pVCpu);
820
821 /*
822 * The AMD64 unwind opcodes.
823 *
824 * This is a program that starts with RSP after a RET instruction that
825 * ends up in recompiled code, and the operations we describe here will
826 * restore all non-volatile registers and bring RSP back to where our
827 * RET address is. This means it's reverse order from what happens in
828 * the prologue.
829 *
830 * Note! Using a frame register approach here both because we have one
831 * and but mainly because the UWOP_ALLOC_LARGE argument values
832 * would be a pain to write initializers for. On the positive
833 * side, we're impervious to changes in the the stack variable
834 * area can can deal with dynamic stack allocations if necessary.
835 */
836 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
837 {
838 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
839 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
840 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
841 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
842 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
843 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
844 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
845 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
846 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
847 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
848 };
849 union
850 {
851 IMAGE_UNWIND_INFO Info;
852 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
853 } s_UnwindInfo =
854 {
855 {
856 /* .Version = */ 1,
857 /* .Flags = */ 0,
858 /* .SizeOfProlog = */ 16, /* whatever */
859 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
860 /* .FrameRegister = */ X86_GREG_xBP,
861 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
862 }
863 };
864 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
865 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
866
867 /*
868 * Calc how much space we need and allocate it off the exec heap.
869 */
870 unsigned const cFunctionEntries = 1;
871 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
872 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
873 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
874 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL, NULL, NULL);
875 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
876 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
877
878 /*
879 * Initialize the structures.
880 */
881 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
882
883 paFunctions[0].BeginAddress = 0;
884 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
885 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
886
887 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
888 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
889
890 /*
891 * Register it.
892 */
893 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
894 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
895
896 return VINF_SUCCESS;
897}
898
899
900# else /* !RT_OS_WINDOWS */
901
902/**
903 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
904 */
905DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
906{
907 if (iValue >= 64)
908 {
909 Assert(iValue < 0x2000);
910 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
911 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
912 }
913 else if (iValue >= 0)
914 *Ptr.pb++ = (uint8_t)iValue;
915 else if (iValue > -64)
916 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
917 else
918 {
919 Assert(iValue > -0x2000);
920 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
921 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
922 }
923 return Ptr;
924}
925
926
927/**
928 * Emits an ULEB128 encoded value (up to 64-bit wide).
929 */
930DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
931{
932 while (uValue >= 0x80)
933 {
934 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
935 uValue >>= 7;
936 }
937 *Ptr.pb++ = (uint8_t)uValue;
938 return Ptr;
939}
940
941
942/**
943 * Emits a CFA rule as register @a uReg + offset @a off.
944 */
945DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
946{
947 *Ptr.pb++ = DW_CFA_def_cfa;
948 Ptr = iemDwarfPutUleb128(Ptr, uReg);
949 Ptr = iemDwarfPutUleb128(Ptr, off);
950 return Ptr;
951}
952
953
954/**
955 * Emits a register (@a uReg) save location:
956 * CFA + @a off * data_alignment_factor
957 */
958DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
959{
960 if (uReg < 0x40)
961 *Ptr.pb++ = DW_CFA_offset | uReg;
962 else
963 {
964 *Ptr.pb++ = DW_CFA_offset_extended;
965 Ptr = iemDwarfPutUleb128(Ptr, uReg);
966 }
967 Ptr = iemDwarfPutUleb128(Ptr, off);
968 return Ptr;
969}
970
971
972# if 0 /* unused */
973/**
974 * Emits a register (@a uReg) save location, using signed offset:
975 * CFA + @a offSigned * data_alignment_factor
976 */
977DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
978{
979 *Ptr.pb++ = DW_CFA_offset_extended_sf;
980 Ptr = iemDwarfPutUleb128(Ptr, uReg);
981 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
982 return Ptr;
983}
984# endif
985
986
987/**
988 * Initializes the unwind info section for non-windows hosts.
989 */
990static int
991iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
992 void *pvChunk, uint32_t idxChunk)
993{
994 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
995 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
996
997 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
998
999 /*
1000 * Generate the CIE first.
1001 */
1002# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
1003 uint8_t const iDwarfVer = 3;
1004# else
1005 uint8_t const iDwarfVer = 4;
1006# endif
1007 RTPTRUNION const PtrCie = Ptr;
1008 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1009 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
1010 *Ptr.pb++ = iDwarfVer; /* DwARF version */
1011 *Ptr.pb++ = 0; /* Augmentation. */
1012 if (iDwarfVer >= 4)
1013 {
1014 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
1015 *Ptr.pb++ = 0; /* Segment selector size. */
1016 }
1017# ifdef RT_ARCH_AMD64
1018 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
1019# else
1020 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
1021# endif
1022 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
1023# ifdef RT_ARCH_AMD64
1024 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
1025# elif defined(RT_ARCH_ARM64)
1026 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
1027# else
1028# error "port me"
1029# endif
1030 /* Initial instructions: */
1031# ifdef RT_ARCH_AMD64
1032 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
1033 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
1034 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
1035 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
1036 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
1037 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
1038 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
1039 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
1040# elif defined(RT_ARCH_ARM64)
1041# if 1
1042 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
1043# else
1044 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
1045# endif
1046 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
1047 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
1048 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
1049 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
1050 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
1051 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
1052 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
1053 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
1054 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
1055 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
1056 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
1057 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1058 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1059 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1060# else
1061# error "port me"
1062# endif
1063 while ((Ptr.u - PtrCie.u) & 3)
1064 *Ptr.pb++ = DW_CFA_nop;
1065 /* Finalize the CIE size. */
1066 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1067
1068 /*
1069 * Generate an FDE for the whole chunk area.
1070 */
1071# ifdef IEMNATIVE_USE_LIBUNWIND
1072 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1073# endif
1074 RTPTRUNION const PtrFde = Ptr;
1075 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1076 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1077 Ptr.pu32++;
1078 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1079 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1080# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1081 *Ptr.pb++ = DW_CFA_nop;
1082# endif
1083 while ((Ptr.u - PtrFde.u) & 3)
1084 *Ptr.pb++ = DW_CFA_nop;
1085 /* Finalize the FDE size. */
1086 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1087
1088 /* Terminator entry. */
1089 *Ptr.pu32++ = 0;
1090 *Ptr.pu32++ = 0; /* just to be sure... */
1091 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1092
1093 /*
1094 * Register it.
1095 */
1096# ifdef IEMNATIVE_USE_LIBUNWIND
1097 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1098# else
1099 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1100 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1101# endif
1102
1103# ifdef IEMNATIVE_USE_GDB_JIT
1104 /*
1105 * Now for telling GDB about this (experimental).
1106 *
1107 * This seems to work best with ET_DYN.
1108 */
1109 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1110 sizeof(GDBJITSYMFILE), NULL, NULL, NULL);
1111 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1112 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1113
1114 RT_ZERO(*pSymFile);
1115
1116 /*
1117 * The ELF header:
1118 */
1119 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1120 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1121 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1122 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1123 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1124 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1125 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1126 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1127# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1128 pSymFile->EHdr.e_type = ET_DYN;
1129# else
1130 pSymFile->EHdr.e_type = ET_REL;
1131# endif
1132# ifdef RT_ARCH_AMD64
1133 pSymFile->EHdr.e_machine = EM_AMD64;
1134# elif defined(RT_ARCH_ARM64)
1135 pSymFile->EHdr.e_machine = EM_AARCH64;
1136# else
1137# error "port me"
1138# endif
1139 pSymFile->EHdr.e_version = 1; /*?*/
1140 pSymFile->EHdr.e_entry = 0;
1141# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1142 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1143# else
1144 pSymFile->EHdr.e_phoff = 0;
1145# endif
1146 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1147 pSymFile->EHdr.e_flags = 0;
1148 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1149# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1150 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1151 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1152# else
1153 pSymFile->EHdr.e_phentsize = 0;
1154 pSymFile->EHdr.e_phnum = 0;
1155# endif
1156 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1157 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1158 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1159
1160 uint32_t offStrTab = 0;
1161#define APPEND_STR(a_szStr) do { \
1162 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1163 offStrTab += sizeof(a_szStr); \
1164 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1165 } while (0)
1166#define APPEND_STR_FMT(a_szStr, ...) do { \
1167 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1168 offStrTab++; \
1169 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1170 } while (0)
1171
1172 /*
1173 * Section headers.
1174 */
1175 /* Section header #0: NULL */
1176 unsigned i = 0;
1177 APPEND_STR("");
1178 RT_ZERO(pSymFile->aShdrs[i]);
1179 i++;
1180
1181 /* Section header: .eh_frame */
1182 pSymFile->aShdrs[i].sh_name = offStrTab;
1183 APPEND_STR(".eh_frame");
1184 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1185 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1186# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1187 pSymFile->aShdrs[i].sh_offset
1188 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1189# else
1190 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1191 pSymFile->aShdrs[i].sh_offset = 0;
1192# endif
1193
1194 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1195 pSymFile->aShdrs[i].sh_link = 0;
1196 pSymFile->aShdrs[i].sh_info = 0;
1197 pSymFile->aShdrs[i].sh_addralign = 1;
1198 pSymFile->aShdrs[i].sh_entsize = 0;
1199 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1200 i++;
1201
1202 /* Section header: .shstrtab */
1203 unsigned const iShStrTab = i;
1204 pSymFile->EHdr.e_shstrndx = iShStrTab;
1205 pSymFile->aShdrs[i].sh_name = offStrTab;
1206 APPEND_STR(".shstrtab");
1207 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1208 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1209# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1210 pSymFile->aShdrs[i].sh_offset
1211 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1212# else
1213 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1214 pSymFile->aShdrs[i].sh_offset = 0;
1215# endif
1216 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1217 pSymFile->aShdrs[i].sh_link = 0;
1218 pSymFile->aShdrs[i].sh_info = 0;
1219 pSymFile->aShdrs[i].sh_addralign = 1;
1220 pSymFile->aShdrs[i].sh_entsize = 0;
1221 i++;
1222
1223 /* Section header: .symbols */
1224 pSymFile->aShdrs[i].sh_name = offStrTab;
1225 APPEND_STR(".symtab");
1226 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1227 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1228 pSymFile->aShdrs[i].sh_offset
1229 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1230 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1231 pSymFile->aShdrs[i].sh_link = iShStrTab;
1232 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1233 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1234 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1235 i++;
1236
1237# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1238 /* Section header: .symbols */
1239 pSymFile->aShdrs[i].sh_name = offStrTab;
1240 APPEND_STR(".dynsym");
1241 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1242 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1243 pSymFile->aShdrs[i].sh_offset
1244 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1245 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1246 pSymFile->aShdrs[i].sh_link = iShStrTab;
1247 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1248 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1249 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1250 i++;
1251# endif
1252
1253# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1254 /* Section header: .dynamic */
1255 pSymFile->aShdrs[i].sh_name = offStrTab;
1256 APPEND_STR(".dynamic");
1257 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1258 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1259 pSymFile->aShdrs[i].sh_offset
1260 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1261 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1262 pSymFile->aShdrs[i].sh_link = iShStrTab;
1263 pSymFile->aShdrs[i].sh_info = 0;
1264 pSymFile->aShdrs[i].sh_addralign = 1;
1265 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1266 i++;
1267# endif
1268
1269 /* Section header: .text */
1270 unsigned const iShText = i;
1271 pSymFile->aShdrs[i].sh_name = offStrTab;
1272 APPEND_STR(".text");
1273 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1274 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1275# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1276 pSymFile->aShdrs[i].sh_offset
1277 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1278# else
1279 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1280 pSymFile->aShdrs[i].sh_offset = 0;
1281# endif
1282 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1283 pSymFile->aShdrs[i].sh_link = 0;
1284 pSymFile->aShdrs[i].sh_info = 0;
1285 pSymFile->aShdrs[i].sh_addralign = 1;
1286 pSymFile->aShdrs[i].sh_entsize = 0;
1287 i++;
1288
1289 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1290
1291# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1292 /*
1293 * The program headers:
1294 */
1295 /* Everything in a single LOAD segment: */
1296 i = 0;
1297 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1298 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1299 pSymFile->aPhdrs[i].p_offset
1300 = pSymFile->aPhdrs[i].p_vaddr
1301 = pSymFile->aPhdrs[i].p_paddr = 0;
1302 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1303 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1304 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1305 i++;
1306 /* The .dynamic segment. */
1307 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1308 pSymFile->aPhdrs[i].p_flags = PF_R;
1309 pSymFile->aPhdrs[i].p_offset
1310 = pSymFile->aPhdrs[i].p_vaddr
1311 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1312 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1313 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1314 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1315 i++;
1316
1317 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1318
1319 /*
1320 * The dynamic section:
1321 */
1322 i = 0;
1323 pSymFile->aDyn[i].d_tag = DT_SONAME;
1324 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1325 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1326 i++;
1327 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1328 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1329 i++;
1330 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1331 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1332 i++;
1333 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1334 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1335 i++;
1336 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1337 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1338 i++;
1339 pSymFile->aDyn[i].d_tag = DT_NULL;
1340 i++;
1341 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1342# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1343
1344 /*
1345 * Symbol tables:
1346 */
1347 /** @todo gdb doesn't seem to really like this ... */
1348 i = 0;
1349 pSymFile->aSymbols[i].st_name = 0;
1350 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1351 pSymFile->aSymbols[i].st_value = 0;
1352 pSymFile->aSymbols[i].st_size = 0;
1353 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1354 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1355# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1356 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1357# endif
1358 i++;
1359
1360 pSymFile->aSymbols[i].st_name = 0;
1361 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1362 pSymFile->aSymbols[i].st_value = 0;
1363 pSymFile->aSymbols[i].st_size = 0;
1364 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1365 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1366 i++;
1367
1368 pSymFile->aSymbols[i].st_name = offStrTab;
1369 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1370# if 0
1371 pSymFile->aSymbols[i].st_shndx = iShText;
1372 pSymFile->aSymbols[i].st_value = 0;
1373# else
1374 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1375 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1376# endif
1377 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1378 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1379 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1380# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1381 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1382 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1383# endif
1384 i++;
1385
1386 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1387 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1388
1389 /*
1390 * The GDB JIT entry and informing GDB.
1391 */
1392 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1393# if 1
1394 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1395# else
1396 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1397# endif
1398
1399 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1400 RTCritSectEnter(&g_IemNativeGdbJitLock);
1401 pEhFrame->GdbJitEntry.pNext = NULL;
1402 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1403 if (__jit_debug_descriptor.pTail)
1404 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1405 else
1406 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1407 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1408 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1409
1410 /* Notify GDB: */
1411 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1412 __jit_debug_register_code();
1413 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1414 RTCritSectLeave(&g_IemNativeGdbJitLock);
1415
1416# else /* !IEMNATIVE_USE_GDB_JIT */
1417 RT_NOREF(pVCpu);
1418# endif /* !IEMNATIVE_USE_GDB_JIT */
1419
1420 return VINF_SUCCESS;
1421}
1422
1423# endif /* !RT_OS_WINDOWS */
1424#endif /* IN_RING3 */
1425
1426
1427/**
1428 * Adds another chunk to the executable memory allocator.
1429 *
1430 * This is used by the init code for the initial allocation and later by the
1431 * regular allocator function when it's out of memory.
1432 */
1433static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1434{
1435 /* Check that we've room for growth. */
1436 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1437 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1438
1439 /* Allocate a chunk. */
1440#ifdef RT_OS_DARWIN
1441 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1442#else
1443 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1444#endif
1445 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1446
1447#ifdef RT_OS_DARWIN
1448 /*
1449 * Because it is impossible to have a RWX memory allocation on macOS try to remap the memory
1450 * chunk readable/executable somewhere else so we can save us the hassle of switching between
1451 * protections when exeuctable memory is allocated.
1452 */
1453 int rc = VERR_NO_EXEC_MEMORY;
1454 mach_port_t hPortTask = mach_task_self();
1455 mach_vm_address_t AddrChunk = (mach_vm_address_t)pvChunk;
1456 mach_vm_address_t AddrRemapped = 0;
1457 vm_prot_t ProtCur = 0;
1458 vm_prot_t ProtMax = 0;
1459 kern_return_t krc = mach_vm_remap(hPortTask, &AddrRemapped, pExecMemAllocator->cbChunk, 0,
1460 VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
1461 hPortTask, AddrChunk, FALSE, &ProtCur, &ProtMax,
1462 VM_INHERIT_NONE);
1463 if (krc == KERN_SUCCESS)
1464 {
1465 krc = mach_vm_protect(mach_task_self(), AddrRemapped, pExecMemAllocator->cbChunk, FALSE, VM_PROT_READ | VM_PROT_EXECUTE);
1466 if (krc == KERN_SUCCESS)
1467 rc = VINF_SUCCESS;
1468 else
1469 {
1470 AssertLogRelMsgFailed(("mach_vm_protect -> %d (%#x)\n", krc, krc));
1471 krc = mach_vm_deallocate(hPortTask, AddrRemapped, pExecMemAllocator->cbChunk);
1472 Assert(krc == KERN_SUCCESS);
1473 }
1474 }
1475 else
1476 AssertLogRelMsgFailed(("mach_vm_remap -> %d (%#x)\n", krc, krc));
1477 if (RT_FAILURE(rc))
1478 {
1479 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1480 return rc;
1481 }
1482
1483 void *pvChunkRx = (void *)AddrRemapped;
1484#else
1485 int rc = VINF_SUCCESS;
1486 void *pvChunkRx = pvChunk;
1487#endif
1488
1489 /*
1490 * Add the chunk.
1491 *
1492 * This must be done before the unwind init so windows can allocate
1493 * memory from the chunk when using the alternative sub-allocator.
1494 */
1495 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = pvChunk;
1496 pExecMemAllocator->aChunks[idxChunk].pvChunkRx = pvChunkRx;
1497#ifdef IN_RING3
1498 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1499#endif
1500 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1501 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1502 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1503 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1504
1505 pExecMemAllocator->cChunks = idxChunk + 1;
1506 pExecMemAllocator->idxChunkHint = idxChunk;
1507
1508 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1509 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1510
1511 /* If there is a chunk context init callback call it. */
1512 rc = iemNativeRecompileAttachExecMemChunkCtx(pVCpu, idxChunk, &pExecMemAllocator->aChunks[idxChunk].pCtx);
1513#ifdef IN_RING3
1514 /*
1515 * Initialize the unwind information (this cannot really fail atm).
1516 * (This sets pvUnwindInfo.)
1517 */
1518 if (RT_SUCCESS(rc))
1519 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunkRx, idxChunk);
1520#endif
1521 if (RT_SUCCESS(rc))
1522 { /* likely */ }
1523 else
1524 {
1525 /* Just in case the impossible happens, undo the above up: */
1526 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1527 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cChunks = idxChunk;
1529 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1530 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1531 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = NULL;
1532 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1533
1534# ifdef RT_OS_DARWIN
1535 krc = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx,
1536 pExecMemAllocator->cbChunk);
1537 Assert(krc == KERN_SUCCESS);
1538# endif
1539
1540 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1541 return rc;
1542 }
1543
1544 return VINF_SUCCESS;
1545}
1546
1547
1548/**
1549 * Initializes the executable memory allocator for native recompilation on the
1550 * calling EMT.
1551 *
1552 * @returns VBox status code.
1553 * @param pVCpu The cross context virtual CPU structure of the calling
1554 * thread.
1555 * @param cbMax The max size of the allocator.
1556 * @param cbInitial The initial allocator size.
1557 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1558 * dependent).
1559 */
1560int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1561{
1562 /*
1563 * Validate input.
1564 */
1565 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1566 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1567 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1568 || cbChunk == 0
1569 || ( RT_IS_POWER_OF_TWO(cbChunk)
1570 && cbChunk >= _1M
1571 && cbChunk <= _256M
1572 && cbChunk <= cbMax),
1573 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1574 VERR_OUT_OF_RANGE);
1575
1576 /*
1577 * Adjust/figure out the chunk size.
1578 */
1579 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1580 {
1581 if (cbMax >= _256M)
1582 cbChunk = _64M;
1583 else
1584 {
1585 if (cbMax < _16M)
1586 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1587 else
1588 cbChunk = (uint32_t)cbMax / 4;
1589 if (!RT_IS_POWER_OF_TWO(cbChunk))
1590 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1591 }
1592 }
1593#if defined(RT_OS_AMD64)
1594 Assert(cbChunk <= _2G);
1595#elif defined(RT_OS_ARM64)
1596 if (cbChunk > _128M)
1597 cbChunk = _128M; /* Max relative branch distance is +/-2^(25+2) = +/-0x8000000 (134 217 728). */
1598#endif
1599
1600 if (cbChunk > cbMax)
1601 cbMax = cbChunk;
1602 else
1603 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1604 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1605 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1606
1607 /*
1608 * Allocate and initialize the allocatore instance.
1609 */
1610 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1611 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1612 size_t cbNeeded = offBitmaps + cbBitmaps;
1613 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1614 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1615#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1616 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1617 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1618#endif
1619 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1620 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1621 VERR_NO_MEMORY);
1622 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1623 pExecMemAllocator->cbChunk = cbChunk;
1624 pExecMemAllocator->cMaxChunks = cMaxChunks;
1625 pExecMemAllocator->cChunks = 0;
1626 pExecMemAllocator->idxChunkHint = 0;
1627 pExecMemAllocator->cAllocations = 0;
1628 pExecMemAllocator->cbTotal = 0;
1629 pExecMemAllocator->cbFree = 0;
1630 pExecMemAllocator->cbAllocated = 0;
1631#ifdef VBOX_WITH_STATISTICS
1632 pExecMemAllocator->cbUnusable = 0;
1633#endif
1634 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1635 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1636 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1637 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1638#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1639 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1640#endif
1641 for (uint32_t i = 0; i < cMaxChunks; i++)
1642 {
1643 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1644 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1645 pExecMemAllocator->aChunks[i].pvChunkRw = NULL;
1646#ifdef IN_RING0
1647 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1648#else
1649 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1650#endif
1651 }
1652 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1653
1654 /*
1655 * Do the initial allocations.
1656 */
1657 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1658 {
1659 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1660 AssertLogRelRCReturn(rc, rc);
1661 }
1662
1663 pExecMemAllocator->idxChunkHint = 0;
1664
1665 /*
1666 * Register statistics.
1667 */
1668 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1669 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1670 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1671 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1672 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1673 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1674 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1675 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1676 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1677 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1678 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1679 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1680 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1681 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1682 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1683#ifdef VBOX_WITH_STATISTICS
1684 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbUnusable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1685 "Total number of bytes being unusable", "/IEM/CPU%u/re/ExecMem/cbUnusable", pVCpu->idCpu);
1686 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1687 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1688#endif
1689#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1690 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1691 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1692 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1693 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1694#endif
1695
1696 return VINF_SUCCESS;
1697}
1698
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette