VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 105284

Last change on this file since 105284 was 105282, checked in by vboxsync, 7 months ago

VMM/IEM: Build fix for the recompiler on linux.am64, bugref:10391

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 66.9 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 105282 2024-07-11 20:25:22Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# include <mach/mach.h>
75# include <mach/mach_vm.h>
76# define IEMNATIVE_USE_LIBUNWIND
77extern "C" void __register_frame(const void *pvFde);
78extern "C" void __deregister_frame(const void *pvFde);
79# else
80# ifdef DEBUG_bird /** @todo not thread safe yet */
81# define IEMNATIVE_USE_GDB_JIT
82# endif
83# ifdef IEMNATIVE_USE_GDB_JIT
84# include <iprt/critsect.h>
85# include <iprt/once.h>
86# include <iprt/formats/elf64.h>
87# endif
88extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
89extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
90# endif
91#endif
92
93#include "IEMN8veRecompiler.h"
94
95
96/*********************************************************************************************************************************
97* Executable Memory Allocator *
98*********************************************************************************************************************************/
99/** The chunk sub-allocation unit size in bytes. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
101/** The chunk sub-allocation unit size as a shift factor. */
102#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
103/** Enables adding a header to the sub-allocator allocations.
104 * This is useful for freeing up executable memory among other things. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
106/** Use alternative pruning. */
107#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
108
109
110#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
111# ifdef IEMNATIVE_USE_GDB_JIT
112# define IEMNATIVE_USE_GDB_JIT_ET_DYN
113
114/** GDB JIT: Code entry. */
115typedef struct GDBJITCODEENTRY
116{
117 struct GDBJITCODEENTRY *pNext;
118 struct GDBJITCODEENTRY *pPrev;
119 uint8_t *pbSymFile;
120 uint64_t cbSymFile;
121} GDBJITCODEENTRY;
122
123/** GDB JIT: Actions. */
124typedef enum GDBJITACTIONS : uint32_t
125{
126 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
127} GDBJITACTIONS;
128
129/** GDB JIT: Descriptor. */
130typedef struct GDBJITDESCRIPTOR
131{
132 uint32_t uVersion;
133 GDBJITACTIONS enmAction;
134 GDBJITCODEENTRY *pRelevant;
135 GDBJITCODEENTRY *pHead;
136 /** Our addition: */
137 GDBJITCODEENTRY *pTail;
138} GDBJITDESCRIPTOR;
139
140/** GDB JIT: Our simple symbol file data. */
141typedef struct GDBJITSYMFILE
142{
143 Elf64_Ehdr EHdr;
144# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
145 Elf64_Shdr aShdrs[5];
146# else
147 Elf64_Shdr aShdrs[7];
148 Elf64_Phdr aPhdrs[2];
149# endif
150 /** The dwarf ehframe data for the chunk. */
151 uint8_t abEhFrame[512];
152 char szzStrTab[128];
153 Elf64_Sym aSymbols[3];
154# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
155 Elf64_Sym aDynSyms[2];
156 Elf64_Dyn aDyn[6];
157# endif
158} GDBJITSYMFILE;
159
160extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
161extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
162
163/** Init once for g_IemNativeGdbJitLock. */
164static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
165/** Init once for the critical section. */
166static RTCRITSECT g_IemNativeGdbJitLock;
167
168/** GDB reads the info here. */
169GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
170
171/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
172DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
173{
174 ASMNopPause();
175}
176
177/** @callback_method_impl{FNRTONCE} */
178static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
179{
180 RT_NOREF(pvUser);
181 return RTCritSectInit(&g_IemNativeGdbJitLock);
182}
183
184
185# endif /* IEMNATIVE_USE_GDB_JIT */
186
187/**
188 * Per-chunk unwind info for non-windows hosts.
189 */
190typedef struct IEMEXECMEMCHUNKEHFRAME
191{
192# ifdef IEMNATIVE_USE_LIBUNWIND
193 /** The offset of the FDA into abEhFrame. */
194 uintptr_t offFda;
195# else
196 /** 'struct object' storage area. */
197 uint8_t abObject[1024];
198# endif
199# ifdef IEMNATIVE_USE_GDB_JIT
200# if 0
201 /** The GDB JIT 'symbol file' data. */
202 GDBJITSYMFILE GdbJitSymFile;
203# endif
204 /** The GDB JIT list entry. */
205 GDBJITCODEENTRY GdbJitEntry;
206# endif
207 /** The dwarf ehframe data for the chunk. */
208 uint8_t abEhFrame[512];
209} IEMEXECMEMCHUNKEHFRAME;
210/** Pointer to per-chunk info info for non-windows hosts. */
211typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
212#endif
213
214
215/**
216 * An chunk of executable memory.
217 */
218typedef struct IEMEXECMEMCHUNK
219{
220 /** Number of free items in this chunk. */
221 uint32_t cFreeUnits;
222 /** Hint were to start searching for free space in the allocation bitmap. */
223 uint32_t idxFreeHint;
224 /** Pointer to the readable/writeable view of the memory chunk. */
225 void *pvChunkRw;
226 /** Pointer to the readable/executable view of the memory chunk. */
227 void *pvChunkRx;
228#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
229 /** Pointer to the context structure detailing the per chunk common code. */
230 PCIEMNATIVEPERCHUNKCTX pCtx;
231#endif
232#ifdef IN_RING3
233 /**
234 * Pointer to the unwind information.
235 *
236 * This is used during C++ throw and longjmp (windows and probably most other
237 * platforms). Some debuggers (windbg) makes use of it as well.
238 *
239 * Windows: This is allocated from hHeap on windows because (at least for
240 * AMD64) the UNWIND_INFO structure address in the
241 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
242 *
243 * Others: Allocated from the regular heap to avoid unnecessary executable data
244 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
245 void *pvUnwindInfo;
246#elif defined(IN_RING0)
247 /** Allocation handle. */
248 RTR0MEMOBJ hMemObj;
249#endif
250} IEMEXECMEMCHUNK;
251/** Pointer to a memory chunk. */
252typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
253
254
255/**
256 * Executable memory allocator for the native recompiler.
257 */
258typedef struct IEMEXECMEMALLOCATOR
259{
260 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
261 uint32_t uMagic;
262
263 /** The chunk size. */
264 uint32_t cbChunk;
265 /** The maximum number of chunks. */
266 uint32_t cMaxChunks;
267 /** The current number of chunks. */
268 uint32_t cChunks;
269 /** Hint where to start looking for available memory. */
270 uint32_t idxChunkHint;
271 /** Statistics: Current number of allocations. */
272 uint32_t cAllocations;
273
274 /** The total amount of memory available. */
275 uint64_t cbTotal;
276 /** Total amount of free memory. */
277 uint64_t cbFree;
278 /** Total amount of memory allocated. */
279 uint64_t cbAllocated;
280
281 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
282 *
283 * Since the chunk size is a power of two and the minimum chunk size is a lot
284 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
285 * require a whole number of uint64_t elements in the allocation bitmap. So,
286 * for sake of simplicity, they are allocated as one continous chunk for
287 * simplicity/laziness. */
288 uint64_t *pbmAlloc;
289 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
290 uint32_t cUnitsPerChunk;
291 /** Number of bitmap elements per chunk (for quickly locating the bitmap
292 * portion corresponding to an chunk). */
293 uint32_t cBitmapElementsPerChunk;
294
295#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
296 /** The next chunk to prune in. */
297 uint32_t idxChunkPrune;
298 /** Where in chunk offset to start pruning at. */
299 uint32_t offChunkPrune;
300 /** Profiling the pruning code. */
301 STAMPROFILE StatPruneProf;
302 /** Number of bytes recovered by the pruning. */
303 STAMPROFILE StatPruneRecovered;
304#endif
305
306#ifdef VBOX_WITH_STATISTICS
307 STAMPROFILE StatAlloc;
308#endif
309
310
311#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
312 /** Pointer to the array of unwind info running parallel to aChunks (same
313 * allocation as this structure, located after the bitmaps).
314 * (For Windows, the structures must reside in 32-bit RVA distance to the
315 * actual chunk, so they are allocated off the chunk.) */
316 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
317#endif
318
319 /** The allocation chunks. */
320 RT_FLEXIBLE_ARRAY_EXTENSION
321 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
322} IEMEXECMEMALLOCATOR;
323/** Pointer to an executable memory allocator. */
324typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
325
326/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
327#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
328
329
330#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
331/**
332 * Allocation header.
333 */
334typedef struct IEMEXECMEMALLOCHDR
335{
336 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
337 uint32_t uMagic;
338 /** The allocation chunk (for speeding up freeing). */
339 uint32_t idxChunk;
340 /** Pointer to the translation block the allocation belongs to.
341 * This is the whole point of the header. */
342 PIEMTB pTb;
343} IEMEXECMEMALLOCHDR;
344/** Pointer to an allocation header. */
345typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
346/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
347# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
348#endif
349
350
351static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
352
353
354#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
355/**
356 * Frees up executable memory when we're out space.
357 *
358 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
359 * space in a more linear fashion from the allocator's point of view. It may
360 * also defragment if implemented & enabled
361 */
362static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
363{
364# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
365# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
366# endif
367 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
368
369 /*
370 * Before we can start, we must process delayed frees.
371 */
372 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
373
374 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
375
376 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
377 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
378 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
379
380 uint32_t const cChunks = pExecMemAllocator->cChunks;
381 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
382 AssertReturnVoid(cChunks >= 1);
383
384 Assert(!pVCpu->iem.s.pCurTbR3);
385
386 /*
387 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
388 * scanning a multiple of two here as well.
389 */
390 uint32_t cbToPrune = cbChunk;
391
392 /* Never more than 25%. */
393 if (cChunks < 4)
394 cbToPrune /= cChunks == 1 ? 4 : 2;
395
396 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
397 if (cbToPrune > _4M)
398 cbToPrune = _4M;
399
400 /*
401 * Adjust the pruning chunk and offset accordingly.
402 */
403 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
404 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
405 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
406 if (offChunk >= cbChunk)
407 {
408 offChunk = 0;
409 idxChunk += 1;
410 }
411 if (idxChunk >= cChunks)
412 {
413 offChunk = 0;
414 idxChunk = 0;
415 }
416
417 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
418
419 /*
420 * Do the pruning. The current approach is the sever kind.
421 */
422 uint64_t cbPruned = 0;
423 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
424 while (offChunk < offPruneEnd)
425 {
426 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
427
428 /* Is this the start of an allocation block for TB? (We typically have
429 one allocation at the start of each chunk for the unwind info where
430 pTb is NULL.) */
431 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
432 && pHdr->pTb != NULL
433 && pHdr->idxChunk == idxChunk)
434 {
435 PIEMTB const pTb = pHdr->pTb;
436 AssertPtr(pTb);
437
438 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
439 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
440 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
441
442 iemTbAllocatorFree(pVCpu, pTb);
443
444 cbPruned += cbBlock;
445 offChunk += cbBlock;
446 }
447 else
448 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
449 }
450 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
451
452 /*
453 * Save the current pruning point.
454 */
455 pExecMemAllocator->offChunkPrune = offChunk;
456 pExecMemAllocator->idxChunkPrune = idxChunk;
457
458 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
459}
460#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
461
462
463/**
464 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
465 */
466static void *
467iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
468 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb,
469 void **ppvExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
470{
471 /*
472 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
473 */
474 Assert(!(cToScan & 63));
475 Assert(!(idxFirst & 63));
476 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
477 pbmAlloc += idxFirst / 64;
478
479 /*
480 * Scan the bitmap for cReqUnits of consequtive clear bits
481 */
482 /** @todo This can probably be done more efficiently for non-x86 systems. */
483 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
484 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
485 {
486 uint32_t idxAddBit = 1;
487 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
488 idxAddBit++;
489 if (idxAddBit >= cReqUnits)
490 {
491 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
492
493 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
494 pChunk->cFreeUnits -= cReqUnits;
495 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
496
497 pExecMemAllocator->cAllocations += 1;
498 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
499 pExecMemAllocator->cbAllocated += cbReq;
500 pExecMemAllocator->cbFree -= cbReq;
501 pExecMemAllocator->idxChunkHint = idxChunk;
502
503 void * const pvMemRw = (uint8_t *)pChunk->pvChunkRw
504 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
505
506 if (ppChunkCtx)
507 *ppChunkCtx = pChunk->pCtx;
508
509 /*
510 * Initialize the header and return.
511 */
512# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
513 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMemRw;
514 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
515 pHdr->idxChunk = idxChunk;
516 pHdr->pTb = pTb;
517
518 if (ppvExec)
519 *ppvExec = (uint8_t *)pChunk->pvChunkRx
520 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT)
521 + sizeof(*pHdr);
522
523 return pHdr + 1;
524#else
525 if (ppvExec)
526 *ppvExec = (uint8_t *)pChunk->pvChunkRx
527 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
528
529 RT_NOREF(pTb);
530 return pvMem;
531#endif
532 }
533
534 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
535 }
536 return NULL;
537}
538
539
540static PIEMNATIVEINSTR
541iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb,
542 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
543{
544 /*
545 * Figure out how much to allocate.
546 */
547#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
548 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
549#else
550 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
551#endif
552 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
553 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
554 {
555 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
556 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
557 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
558 {
559 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
560 pExecMemAllocator->cUnitsPerChunk - idxHint,
561 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
562 if (pvRet)
563 return (PIEMNATIVEINSTR)pvRet;
564 }
565 return (PIEMNATIVEINSTR)iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
566 RT_MIN(pExecMemAllocator->cUnitsPerChunk,
567 RT_ALIGN_32(idxHint + cReqUnits, 64)),
568 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
569 }
570 return NULL;
571}
572
573
574/**
575 * Allocates @a cbReq bytes of executable memory.
576 *
577 * @returns Pointer to the readable/writeable memory, NULL if out of memory or other problem
578 * encountered.
579 * @param pVCpu The cross context virtual CPU structure of the
580 * calling thread.
581 * @param cbReq How many bytes are required.
582 * @param pTb The translation block that will be using the allocation.
583 * @param ppaExec Where to return the pointer to executable view of
584 * the allocated memory, optional.
585 * @param ppChunkCtx Where to return the per chunk attached context
586 * if available, optional.
587 */
588DECLHIDDEN(PIEMNATIVEINSTR) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb,
589 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx) RT_NOEXCEPT
590{
591 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
592 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
593 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
594 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
595
596 for (unsigned iIteration = 0;; iIteration++)
597 {
598 if (cbReq <= pExecMemAllocator->cbFree)
599 {
600 uint32_t const cChunks = pExecMemAllocator->cChunks;
601 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
602 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
603 {
604 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
605 ppaExec, ppChunkCtx);
606 if (pRet)
607 {
608 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
609 return pRet;
610 }
611 }
612 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
613 {
614 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
615 ppaExec, ppChunkCtx);
616 if (pRet)
617 {
618 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
619 return pRet;
620 }
621 }
622 }
623
624 /*
625 * Can we grow it with another chunk?
626 */
627 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
628 {
629 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
630 AssertLogRelRCReturn(rc, NULL);
631
632 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
633 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
634 ppaExec, ppChunkCtx);
635 if (pRet)
636 {
637 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
638 return pRet;
639 }
640 AssertFailed();
641 }
642
643 /*
644 * Try prune native TBs once.
645 */
646 if (iIteration == 0)
647 {
648#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
649 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
650#else
651 /* No header included in the instruction count here. */
652 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
653 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
654#endif
655 }
656 else
657 {
658 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
659 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
660 return NULL;
661 }
662 }
663}
664
665
666/** This is a hook to ensure the instruction cache is properly flushed before the code in the memory
667 * given by @a pv and @a cb is executed */
668DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
669{
670#ifdef RT_OS_DARWIN
671 /*
672 * Flush the instruction cache:
673 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
674 */
675 /* sys_dcache_flush(pv, cb); - not necessary */
676 sys_icache_invalidate(pv, cb);
677 RT_NOREF(pVCpu);
678
679#elif defined(RT_OS_LINUX) && defined(RT_ARCH_ARM64)
680 RT_NOREF(pVCpu);
681
682 /* There is __builtin___clear_cache() but it flushes both the instruction and data cache, so do it manually. */
683 static uint32_t s_u32CtrEl0 = 0;
684 if (!s_u32CtrEl0)
685 asm volatile ("mrs %0, ctr_el0":"=r" (s_u32CtrEl0));
686 uintptr_t cbICacheLine = (uintptr_t)4 << (s_u32CtrEl0 & 0xf);
687
688 uintptr_t pb = (uintptr_t)pv & ~(cbICacheLine - 1);
689 for (; pb < (uintptr_t)pv + cb; pb += cbICacheLine)
690 asm volatile ("ic ivau, %0" : : "r" (pb) : "memory");
691
692 asm volatile ("dsb ish\n\t isb\n\t" : : : "memory");
693
694#else
695 RT_NOREF(pVCpu, pv, cb);
696#endif
697}
698
699
700/**
701 * Frees executable memory.
702 */
703DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
704{
705 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
706 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
707 AssertPtr(pv);
708#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
709 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
710
711 /* Align the size as we did when allocating the block. */
712 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
713
714#else
715 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
716 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
717 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
718 uint32_t const idxChunk = pHdr->idxChunk;
719 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
720 pv = pHdr;
721
722 /* Adjust and align the size to cover the whole allocation area. */
723 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
724#endif
725
726 /* Free it / assert sanity. */
727 bool fFound = false;
728 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
729#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
730 uint32_t const cChunks = pExecMemAllocator->cChunks;
731 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
732#endif
733 {
734 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
735 fFound = offChunk < cbChunk;
736 if (fFound)
737 {
738 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
739 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
740
741 /* Check that it's valid and free it. */
742 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
743 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
744 for (uint32_t i = 1; i < cReqUnits; i++)
745 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
746 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
747
748 /* Invalidate the header using the writeable memory view. */
749 pHdr = (PIEMEXECMEMALLOCHDR)((uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRw + offChunk);
750#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
751 pHdr->uMagic = 0;
752 pHdr->idxChunk = 0;
753 pHdr->pTb = NULL;
754#endif
755 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
756 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
757
758 /* Update the stats. */
759 pExecMemAllocator->cbAllocated -= cb;
760 pExecMemAllocator->cbFree += cb;
761 pExecMemAllocator->cAllocations -= 1;
762 return;
763 }
764 }
765 AssertFailed();
766}
767
768
769/**
770 * Interface used by iemNativeRecompileAttachExecMemChunkCtx and unwind info
771 * generators.
772 */
773DECLHIDDEN(PIEMNATIVEINSTR)
774iemExecMemAllocatorAllocFromChunk(PVMCPU pVCpu, uint32_t idxChunk, uint32_t cbReq, PIEMNATIVEINSTR *ppaExec)
775{
776 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
777 AssertReturn(idxChunk < pExecMemAllocator->cChunks, NULL);
778 Assert(cbReq < _1M);
779 return iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, NULL /*pTb*/, ppaExec, NULL /*ppChunkCtx*/);
780}
781
782
783#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
784/**
785 * For getting the per-chunk context detailing common code for a TB.
786 *
787 * This is for use by the disassembler.
788 */
789DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemExecMemGetTbChunkCtx(PVMCPU pVCpu, PCIEMTB pTb)
790{
791 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
792 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
793 {
794 uintptr_t const uAddress = (uintptr_t)pTb->Native.paInstructions;
795 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
796 uint32_t idxChunk = pExecMemAllocator->cChunks;
797 while (idxChunk-- > 0)
798 if (uAddress - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx < cbChunk)
799 return pExecMemAllocator->aChunks[idxChunk].pCtx;
800 }
801 return NULL;
802}
803#endif
804
805
806#ifdef IN_RING3
807# ifdef RT_OS_WINDOWS
808
809/**
810 * Initializes the unwind info structures for windows hosts.
811 */
812static int
813iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
814 void *pvChunk, uint32_t idxChunk)
815{
816 RT_NOREF(pVCpu);
817
818 /*
819 * The AMD64 unwind opcodes.
820 *
821 * This is a program that starts with RSP after a RET instruction that
822 * ends up in recompiled code, and the operations we describe here will
823 * restore all non-volatile registers and bring RSP back to where our
824 * RET address is. This means it's reverse order from what happens in
825 * the prologue.
826 *
827 * Note! Using a frame register approach here both because we have one
828 * and but mainly because the UWOP_ALLOC_LARGE argument values
829 * would be a pain to write initializers for. On the positive
830 * side, we're impervious to changes in the the stack variable
831 * area can can deal with dynamic stack allocations if necessary.
832 */
833 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
834 {
835 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
836 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
837 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
838 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
839 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
840 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
841 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
842 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
843 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
844 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
845 };
846 union
847 {
848 IMAGE_UNWIND_INFO Info;
849 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
850 } s_UnwindInfo =
851 {
852 {
853 /* .Version = */ 1,
854 /* .Flags = */ 0,
855 /* .SizeOfProlog = */ 16, /* whatever */
856 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
857 /* .FrameRegister = */ X86_GREG_xBP,
858 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
859 }
860 };
861 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
862 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
863
864 /*
865 * Calc how much space we need and allocate it off the exec heap.
866 */
867 unsigned const cFunctionEntries = 1;
868 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
869 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
870 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
871 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL, NULL, NULL);
872 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
874
875 /*
876 * Initialize the structures.
877 */
878 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
879
880 paFunctions[0].BeginAddress = 0;
881 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
882 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
883
884 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
885 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
886
887 /*
888 * Register it.
889 */
890 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
891 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
892
893 return VINF_SUCCESS;
894}
895
896
897# else /* !RT_OS_WINDOWS */
898
899/**
900 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
901 */
902DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
903{
904 if (iValue >= 64)
905 {
906 Assert(iValue < 0x2000);
907 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
908 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
909 }
910 else if (iValue >= 0)
911 *Ptr.pb++ = (uint8_t)iValue;
912 else if (iValue > -64)
913 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
914 else
915 {
916 Assert(iValue > -0x2000);
917 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
918 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
919 }
920 return Ptr;
921}
922
923
924/**
925 * Emits an ULEB128 encoded value (up to 64-bit wide).
926 */
927DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
928{
929 while (uValue >= 0x80)
930 {
931 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
932 uValue >>= 7;
933 }
934 *Ptr.pb++ = (uint8_t)uValue;
935 return Ptr;
936}
937
938
939/**
940 * Emits a CFA rule as register @a uReg + offset @a off.
941 */
942DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
943{
944 *Ptr.pb++ = DW_CFA_def_cfa;
945 Ptr = iemDwarfPutUleb128(Ptr, uReg);
946 Ptr = iemDwarfPutUleb128(Ptr, off);
947 return Ptr;
948}
949
950
951/**
952 * Emits a register (@a uReg) save location:
953 * CFA + @a off * data_alignment_factor
954 */
955DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
956{
957 if (uReg < 0x40)
958 *Ptr.pb++ = DW_CFA_offset | uReg;
959 else
960 {
961 *Ptr.pb++ = DW_CFA_offset_extended;
962 Ptr = iemDwarfPutUleb128(Ptr, uReg);
963 }
964 Ptr = iemDwarfPutUleb128(Ptr, off);
965 return Ptr;
966}
967
968
969# if 0 /* unused */
970/**
971 * Emits a register (@a uReg) save location, using signed offset:
972 * CFA + @a offSigned * data_alignment_factor
973 */
974DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
975{
976 *Ptr.pb++ = DW_CFA_offset_extended_sf;
977 Ptr = iemDwarfPutUleb128(Ptr, uReg);
978 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
979 return Ptr;
980}
981# endif
982
983
984/**
985 * Initializes the unwind info section for non-windows hosts.
986 */
987static int
988iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
989 void *pvChunk, uint32_t idxChunk)
990{
991 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
992 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
993
994 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
995
996 /*
997 * Generate the CIE first.
998 */
999# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
1000 uint8_t const iDwarfVer = 3;
1001# else
1002 uint8_t const iDwarfVer = 4;
1003# endif
1004 RTPTRUNION const PtrCie = Ptr;
1005 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1006 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
1007 *Ptr.pb++ = iDwarfVer; /* DwARF version */
1008 *Ptr.pb++ = 0; /* Augmentation. */
1009 if (iDwarfVer >= 4)
1010 {
1011 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
1012 *Ptr.pb++ = 0; /* Segment selector size. */
1013 }
1014# ifdef RT_ARCH_AMD64
1015 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
1016# else
1017 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
1018# endif
1019 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
1020# ifdef RT_ARCH_AMD64
1021 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
1022# elif defined(RT_ARCH_ARM64)
1023 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
1024# else
1025# error "port me"
1026# endif
1027 /* Initial instructions: */
1028# ifdef RT_ARCH_AMD64
1029 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
1030 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
1031 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
1032 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
1033 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
1034 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
1035 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
1036 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
1037# elif defined(RT_ARCH_ARM64)
1038# if 1
1039 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
1040# else
1041 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
1042# endif
1043 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
1044 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
1045 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
1046 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
1047 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
1048 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
1049 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
1050 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
1051 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
1052 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
1053 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
1054 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1055 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1056 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1057# else
1058# error "port me"
1059# endif
1060 while ((Ptr.u - PtrCie.u) & 3)
1061 *Ptr.pb++ = DW_CFA_nop;
1062 /* Finalize the CIE size. */
1063 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1064
1065 /*
1066 * Generate an FDE for the whole chunk area.
1067 */
1068# ifdef IEMNATIVE_USE_LIBUNWIND
1069 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1070# endif
1071 RTPTRUNION const PtrFde = Ptr;
1072 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1073 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1074 Ptr.pu32++;
1075 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1076 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1077# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1078 *Ptr.pb++ = DW_CFA_nop;
1079# endif
1080 while ((Ptr.u - PtrFde.u) & 3)
1081 *Ptr.pb++ = DW_CFA_nop;
1082 /* Finalize the FDE size. */
1083 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1084
1085 /* Terminator entry. */
1086 *Ptr.pu32++ = 0;
1087 *Ptr.pu32++ = 0; /* just to be sure... */
1088 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1089
1090 /*
1091 * Register it.
1092 */
1093# ifdef IEMNATIVE_USE_LIBUNWIND
1094 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1095# else
1096 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1097 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1098# endif
1099
1100# ifdef IEMNATIVE_USE_GDB_JIT
1101 /*
1102 * Now for telling GDB about this (experimental).
1103 *
1104 * This seems to work best with ET_DYN.
1105 */
1106 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1107 sizeof(GDBJITSYMFILE), NULL, NULL, NULL);
1108 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1109 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1110
1111 RT_ZERO(*pSymFile);
1112
1113 /*
1114 * The ELF header:
1115 */
1116 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1117 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1118 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1119 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1120 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1121 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1122 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1123 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1124# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1125 pSymFile->EHdr.e_type = ET_DYN;
1126# else
1127 pSymFile->EHdr.e_type = ET_REL;
1128# endif
1129# ifdef RT_ARCH_AMD64
1130 pSymFile->EHdr.e_machine = EM_AMD64;
1131# elif defined(RT_ARCH_ARM64)
1132 pSymFile->EHdr.e_machine = EM_AARCH64;
1133# else
1134# error "port me"
1135# endif
1136 pSymFile->EHdr.e_version = 1; /*?*/
1137 pSymFile->EHdr.e_entry = 0;
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1140# else
1141 pSymFile->EHdr.e_phoff = 0;
1142# endif
1143 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1144 pSymFile->EHdr.e_flags = 0;
1145 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1146# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1147 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1148 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1149# else
1150 pSymFile->EHdr.e_phentsize = 0;
1151 pSymFile->EHdr.e_phnum = 0;
1152# endif
1153 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1154 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1155 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1156
1157 uint32_t offStrTab = 0;
1158#define APPEND_STR(a_szStr) do { \
1159 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1160 offStrTab += sizeof(a_szStr); \
1161 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1162 } while (0)
1163#define APPEND_STR_FMT(a_szStr, ...) do { \
1164 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1165 offStrTab++; \
1166 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1167 } while (0)
1168
1169 /*
1170 * Section headers.
1171 */
1172 /* Section header #0: NULL */
1173 unsigned i = 0;
1174 APPEND_STR("");
1175 RT_ZERO(pSymFile->aShdrs[i]);
1176 i++;
1177
1178 /* Section header: .eh_frame */
1179 pSymFile->aShdrs[i].sh_name = offStrTab;
1180 APPEND_STR(".eh_frame");
1181 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1182 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1183# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1184 pSymFile->aShdrs[i].sh_offset
1185 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1186# else
1187 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1188 pSymFile->aShdrs[i].sh_offset = 0;
1189# endif
1190
1191 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1192 pSymFile->aShdrs[i].sh_link = 0;
1193 pSymFile->aShdrs[i].sh_info = 0;
1194 pSymFile->aShdrs[i].sh_addralign = 1;
1195 pSymFile->aShdrs[i].sh_entsize = 0;
1196 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1197 i++;
1198
1199 /* Section header: .shstrtab */
1200 unsigned const iShStrTab = i;
1201 pSymFile->EHdr.e_shstrndx = iShStrTab;
1202 pSymFile->aShdrs[i].sh_name = offStrTab;
1203 APPEND_STR(".shstrtab");
1204 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1205 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1206# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1207 pSymFile->aShdrs[i].sh_offset
1208 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1209# else
1210 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1211 pSymFile->aShdrs[i].sh_offset = 0;
1212# endif
1213 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1214 pSymFile->aShdrs[i].sh_link = 0;
1215 pSymFile->aShdrs[i].sh_info = 0;
1216 pSymFile->aShdrs[i].sh_addralign = 1;
1217 pSymFile->aShdrs[i].sh_entsize = 0;
1218 i++;
1219
1220 /* Section header: .symbols */
1221 pSymFile->aShdrs[i].sh_name = offStrTab;
1222 APPEND_STR(".symtab");
1223 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1224 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1225 pSymFile->aShdrs[i].sh_offset
1226 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1227 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1228 pSymFile->aShdrs[i].sh_link = iShStrTab;
1229 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1230 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1231 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1232 i++;
1233
1234# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1235 /* Section header: .symbols */
1236 pSymFile->aShdrs[i].sh_name = offStrTab;
1237 APPEND_STR(".dynsym");
1238 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1239 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1240 pSymFile->aShdrs[i].sh_offset
1241 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1242 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1243 pSymFile->aShdrs[i].sh_link = iShStrTab;
1244 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1245 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1246 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1247 i++;
1248# endif
1249
1250# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1251 /* Section header: .dynamic */
1252 pSymFile->aShdrs[i].sh_name = offStrTab;
1253 APPEND_STR(".dynamic");
1254 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1255 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1256 pSymFile->aShdrs[i].sh_offset
1257 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1258 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1259 pSymFile->aShdrs[i].sh_link = iShStrTab;
1260 pSymFile->aShdrs[i].sh_info = 0;
1261 pSymFile->aShdrs[i].sh_addralign = 1;
1262 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1263 i++;
1264# endif
1265
1266 /* Section header: .text */
1267 unsigned const iShText = i;
1268 pSymFile->aShdrs[i].sh_name = offStrTab;
1269 APPEND_STR(".text");
1270 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1271 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1272# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1273 pSymFile->aShdrs[i].sh_offset
1274 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1275# else
1276 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1277 pSymFile->aShdrs[i].sh_offset = 0;
1278# endif
1279 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1280 pSymFile->aShdrs[i].sh_link = 0;
1281 pSymFile->aShdrs[i].sh_info = 0;
1282 pSymFile->aShdrs[i].sh_addralign = 1;
1283 pSymFile->aShdrs[i].sh_entsize = 0;
1284 i++;
1285
1286 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1287
1288# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1289 /*
1290 * The program headers:
1291 */
1292 /* Everything in a single LOAD segment: */
1293 i = 0;
1294 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1295 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1296 pSymFile->aPhdrs[i].p_offset
1297 = pSymFile->aPhdrs[i].p_vaddr
1298 = pSymFile->aPhdrs[i].p_paddr = 0;
1299 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1300 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1301 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1302 i++;
1303 /* The .dynamic segment. */
1304 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1305 pSymFile->aPhdrs[i].p_flags = PF_R;
1306 pSymFile->aPhdrs[i].p_offset
1307 = pSymFile->aPhdrs[i].p_vaddr
1308 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1309 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1310 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1311 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1312 i++;
1313
1314 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1315
1316 /*
1317 * The dynamic section:
1318 */
1319 i = 0;
1320 pSymFile->aDyn[i].d_tag = DT_SONAME;
1321 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1322 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1323 i++;
1324 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1325 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1326 i++;
1327 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1328 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1329 i++;
1330 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1331 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1332 i++;
1333 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1334 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1335 i++;
1336 pSymFile->aDyn[i].d_tag = DT_NULL;
1337 i++;
1338 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1339# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1340
1341 /*
1342 * Symbol tables:
1343 */
1344 /** @todo gdb doesn't seem to really like this ... */
1345 i = 0;
1346 pSymFile->aSymbols[i].st_name = 0;
1347 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1348 pSymFile->aSymbols[i].st_value = 0;
1349 pSymFile->aSymbols[i].st_size = 0;
1350 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1351 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1352# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1353 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1354# endif
1355 i++;
1356
1357 pSymFile->aSymbols[i].st_name = 0;
1358 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1359 pSymFile->aSymbols[i].st_value = 0;
1360 pSymFile->aSymbols[i].st_size = 0;
1361 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1362 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1363 i++;
1364
1365 pSymFile->aSymbols[i].st_name = offStrTab;
1366 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1367# if 0
1368 pSymFile->aSymbols[i].st_shndx = iShText;
1369 pSymFile->aSymbols[i].st_value = 0;
1370# else
1371 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1372 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1373# endif
1374 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1375 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1376 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1377# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1378 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1379 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1380# endif
1381 i++;
1382
1383 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1384 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1385
1386 /*
1387 * The GDB JIT entry and informing GDB.
1388 */
1389 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1390# if 1
1391 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1392# else
1393 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1394# endif
1395
1396 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1397 RTCritSectEnter(&g_IemNativeGdbJitLock);
1398 pEhFrame->GdbJitEntry.pNext = NULL;
1399 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1400 if (__jit_debug_descriptor.pTail)
1401 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1402 else
1403 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1404 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1405 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1406
1407 /* Notify GDB: */
1408 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1409 __jit_debug_register_code();
1410 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1411 RTCritSectLeave(&g_IemNativeGdbJitLock);
1412
1413# else /* !IEMNATIVE_USE_GDB_JIT */
1414 RT_NOREF(pVCpu);
1415# endif /* !IEMNATIVE_USE_GDB_JIT */
1416
1417 return VINF_SUCCESS;
1418}
1419
1420# endif /* !RT_OS_WINDOWS */
1421#endif /* IN_RING3 */
1422
1423
1424/**
1425 * Adds another chunk to the executable memory allocator.
1426 *
1427 * This is used by the init code for the initial allocation and later by the
1428 * regular allocator function when it's out of memory.
1429 */
1430static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1431{
1432 /* Check that we've room for growth. */
1433 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1434 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1435
1436 /* Allocate a chunk. */
1437#ifdef RT_OS_DARWIN
1438 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1439#else
1440 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1441#endif
1442 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1443
1444#ifdef RT_OS_DARWIN
1445 /*
1446 * Because it is impossible to have a RWX memory allocation on macOS try to remap the memory
1447 * chunk readable/executable somewhere else so we can save us the hassle of switching between
1448 * protections when exeuctable memory is allocated.
1449 */
1450 int rc = VERR_NO_EXEC_MEMORY;
1451 mach_port_t hPortTask = mach_task_self();
1452 mach_vm_address_t AddrChunk = (mach_vm_address_t)pvChunk;
1453 mach_vm_address_t AddrRemapped = 0;
1454 vm_prot_t ProtCur = 0;
1455 vm_prot_t ProtMax = 0;
1456 kern_return_t krc = mach_vm_remap(hPortTask, &AddrRemapped, pExecMemAllocator->cbChunk, 0,
1457 VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
1458 hPortTask, AddrChunk, FALSE, &ProtCur, &ProtMax,
1459 VM_INHERIT_NONE);
1460 if (krc == KERN_SUCCESS)
1461 {
1462 krc = mach_vm_protect(mach_task_self(), AddrRemapped, pExecMemAllocator->cbChunk, FALSE, VM_PROT_READ | VM_PROT_EXECUTE);
1463 if (krc == KERN_SUCCESS)
1464 rc = VINF_SUCCESS;
1465 else
1466 {
1467 AssertLogRelMsgFailed(("mach_vm_protect -> %d (%#x)\n", krc, krc));
1468 krc = mach_vm_deallocate(hPortTask, AddrRemapped, pExecMemAllocator->cbChunk);
1469 Assert(krc == KERN_SUCCESS);
1470 }
1471 }
1472 else
1473 AssertLogRelMsgFailed(("mach_vm_remap -> %d (%#x)\n", krc, krc));
1474 if (RT_FAILURE(rc))
1475 {
1476 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1477 return rc;
1478 }
1479
1480 void *pvChunkRx = (void *)AddrRemapped;
1481#else
1482# if defined(IN_RING3) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
1483 int rc = VINF_SUCCESS;
1484# endif
1485 void *pvChunkRx = pvChunk;
1486#endif
1487
1488 /*
1489 * Add the chunk.
1490 *
1491 * This must be done before the unwind init so windows can allocate
1492 * memory from the chunk when using the alternative sub-allocator.
1493 */
1494 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = pvChunk;
1495 pExecMemAllocator->aChunks[idxChunk].pvChunkRx = pvChunkRx;
1496#ifdef IN_RING3
1497 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1498#endif
1499 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1500 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1501 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1502 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1503
1504 pExecMemAllocator->cChunks = idxChunk + 1;
1505 pExecMemAllocator->idxChunkHint = idxChunk;
1506
1507 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1508 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1509
1510 /* If there is a chunk context init callback call it. */
1511#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
1512 pExecMemAllocator->aChunks[idxChunk].pCtx = iemNativeRecompileAttachExecMemChunkCtx(pVCpu, idxChunk);
1513 if (pExecMemAllocator->aChunks[idxChunk].pCtx)
1514#endif
1515 {
1516#ifdef IN_RING3
1517 /*
1518 * Initialize the unwind information (this cannot really fail atm).
1519 * (This sets pvUnwindInfo.)
1520 */
1521 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunkRx, idxChunk);
1522#endif
1523 }
1524#if defined(IN_RING3) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
1525 if (RT_SUCCESS(rc))
1526 { /* likely */ }
1527 else
1528 {
1529 /* Just in case the impossible happens, undo the above up: */
1530 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1531 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1532 pExecMemAllocator->cChunks = idxChunk;
1533 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1534 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1535 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = NULL;
1536 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1537
1538# ifdef RT_OS_DARWIN
1539 krc = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx,
1540 pExecMemAllocator->cbChunk);
1541 Assert(krc == KERN_SUCCESS);
1542# endif
1543
1544 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1545 return rc;
1546 }
1547#endif
1548
1549 return VINF_SUCCESS;
1550}
1551
1552
1553/**
1554 * Initializes the executable memory allocator for native recompilation on the
1555 * calling EMT.
1556 *
1557 * @returns VBox status code.
1558 * @param pVCpu The cross context virtual CPU structure of the calling
1559 * thread.
1560 * @param cbMax The max size of the allocator.
1561 * @param cbInitial The initial allocator size.
1562 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1563 * dependent).
1564 */
1565int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1566{
1567 /*
1568 * Validate input.
1569 */
1570 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1571 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1572 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1573 || cbChunk == 0
1574 || ( RT_IS_POWER_OF_TWO(cbChunk)
1575 && cbChunk >= _1M
1576 && cbChunk <= _256M
1577 && cbChunk <= cbMax),
1578 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1579 VERR_OUT_OF_RANGE);
1580
1581 /*
1582 * Adjust/figure out the chunk size.
1583 */
1584 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1585 {
1586 if (cbMax >= _256M)
1587 cbChunk = _64M;
1588 else
1589 {
1590 if (cbMax < _16M)
1591 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1592 else
1593 cbChunk = (uint32_t)cbMax / 4;
1594 if (!RT_IS_POWER_OF_TWO(cbChunk))
1595 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1596 }
1597 }
1598#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
1599# if defined(RT_OS_AMD64)
1600 Assert(cbChunk <= _2G);
1601# elif defined(RT_OS_ARM64)
1602 if (cbChunk > _128M)
1603 cbChunk = _128M; /* Max relative branch distance is +/-2^(25+2) = +/-0x8000000 (134 217 728). */
1604# endif
1605#endif
1606
1607 if (cbChunk > cbMax)
1608 cbMax = cbChunk;
1609 else
1610 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1611 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1612 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1613
1614 /*
1615 * Allocate and initialize the allocatore instance.
1616 */
1617 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1618 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1619 size_t cbNeeded = offBitmaps + cbBitmaps;
1620 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1621 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1622#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1623 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1624 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1625#endif
1626 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1627 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1628 VERR_NO_MEMORY);
1629 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1630 pExecMemAllocator->cbChunk = cbChunk;
1631 pExecMemAllocator->cMaxChunks = cMaxChunks;
1632 pExecMemAllocator->cChunks = 0;
1633 pExecMemAllocator->idxChunkHint = 0;
1634 pExecMemAllocator->cAllocations = 0;
1635 pExecMemAllocator->cbTotal = 0;
1636 pExecMemAllocator->cbFree = 0;
1637 pExecMemAllocator->cbAllocated = 0;
1638 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1639 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1640 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1641 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1642#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1643 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1644#endif
1645 for (uint32_t i = 0; i < cMaxChunks; i++)
1646 {
1647 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1648 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1649 pExecMemAllocator->aChunks[i].pvChunkRw = NULL;
1650#ifdef IN_RING0
1651 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1652#else
1653 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1654#endif
1655 }
1656 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1657
1658 /*
1659 * Do the initial allocations.
1660 */
1661 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1662 {
1663 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1664 AssertLogRelRCReturn(rc, rc);
1665 }
1666
1667 pExecMemAllocator->idxChunkHint = 0;
1668
1669 /*
1670 * Register statistics.
1671 */
1672 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1673 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1674 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1675 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1676 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1677 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1678 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1679 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1680 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1681 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1682 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1683 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1684 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1685 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1686 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1687#ifdef VBOX_WITH_STATISTICS
1688 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1689 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1690#endif
1691#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1692 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1693 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1694 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1695 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1696#endif
1697
1698 return VINF_SUCCESS;
1699}
1700
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette