VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101261

Last change on this file since 101261 was 101261, checked in by vboxsync, 19 months ago

VMM/IEM: Corrected FNIEMNATIVETB so it clearly indicates that exceptions may be thrown (or longjmps made) from the translation block. Accidentally used the wrong macro and clang-1200.0.32.27 optimized the exception catching away in release builds on arm64. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 76.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101261 2023-09-25 23:57:08Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : ...
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): ...
18 * - Level 12 (Log12): ...
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/heap.h>
59#include <iprt/mem.h>
60#include <iprt/string.h>
61#if defined(RT_ARCH_AMD64)
62# include <iprt/x86.h>
63#elif defined(RT_ARCH_ARM64)
64# include <iprt/armv8.h>
65#endif
66
67#ifdef RT_OS_WINDOWS
68# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
70extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
71#else
72# include <iprt/formats/dwarf.h>
73# if defined(RT_OS_DARWIN)
74# include <libkern/OSCacheControl.h>
75# define IEMNATIVE_USE_LIBUNWIND
76extern "C" void __register_frame(const void *pvFde);
77extern "C" void __deregister_frame(const void *pvFde);
78# else
79extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
80extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
81# endif
82#endif
83
84#include "IEMInline.h"
85#include "IEMThreadedFunctions.h"
86#include "IEMN8veRecompiler.h"
87
88
89/*
90 * Narrow down configs here to avoid wasting time on unused configs here.
91 * Note! Same checks in IEMAllThrdRecompiler.cpp.
92 */
93
94#ifndef IEM_WITH_CODE_TLB
95# error The code TLB must be enabled for the recompiler.
96#endif
97
98#ifndef IEM_WITH_DATA_TLB
99# error The data TLB must be enabled for the recompiler.
100#endif
101
102#ifndef IEM_WITH_SETJMP
103# error The setjmp approach must be enabled for the recompiler.
104#endif
105
106
107/*********************************************************************************************************************************
108* Executable Memory Allocator *
109*********************************************************************************************************************************/
110/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
111 * Use an alternative chunk sub-allocator that does store internal data
112 * in the chunk.
113 *
114 * Using the RTHeapSimple is not practial on newer darwin systems where
115 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
116 * memory. We would have to change the protection of the whole chunk for
117 * every call to RTHeapSimple, which would be rather expensive.
118 *
119 * This alternative implemenation let restrict page protection modifications
120 * to the pages backing the executable memory we just allocated.
121 */
122#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
123/** The chunk sub-allocation unit size in bytes. */
124#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
125/** The chunk sub-allocation unit size as a shift factor. */
126#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
127
128#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
129/**
130 * Per-chunk unwind info for non-windows hosts.
131 */
132typedef struct IEMEXECMEMCHUNKEHFRAME
133{
134# ifdef IEMNATIVE_USE_LIBUNWIND
135 /** The offset of the FDA into abEhFrame. */
136 uintptr_t offFda;
137# else
138 /** struct object storage area. */
139 uint8_t abObject[1024];
140# endif
141 /** The dwarf ehframe data for the chunk. */
142 uint8_t abEhFrame[512];
143} IEMEXECMEMCHUNKEHFRAME;
144/** Pointer to per-chunk info info for non-windows hosts. */
145typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
146#endif
147
148
149/**
150 * An chunk of executable memory.
151 */
152typedef struct IEMEXECMEMCHUNK
153{
154#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 /** Number of free items in this chunk. */
156 uint32_t cFreeUnits;
157 /** Hint were to start searching for free space in the allocation bitmap. */
158 uint32_t idxFreeHint;
159#else
160 /** The heap handle. */
161 RTHEAPSIMPLE hHeap;
162#endif
163 /** Pointer to the chunk. */
164 void *pvChunk;
165#ifdef IN_RING3
166 /**
167 * Pointer to the unwind information.
168 *
169 * This is used during C++ throw and longjmp (windows and probably most other
170 * platforms). Some debuggers (windbg) makes use of it as well.
171 *
172 * Windows: This is allocated from hHeap on windows because (at least for
173 * AMD64) the UNWIND_INFO structure address in the
174 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
175 *
176 * Others: Allocated from the regular heap to avoid unnecessary executable data
177 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
178 void *pvUnwindInfo;
179#elif defined(IN_RING0)
180 /** Allocation handle. */
181 RTR0MEMOBJ hMemObj;
182#endif
183} IEMEXECMEMCHUNK;
184/** Pointer to a memory chunk. */
185typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
186
187
188/**
189 * Executable memory allocator for the native recompiler.
190 */
191typedef struct IEMEXECMEMALLOCATOR
192{
193 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
194 uint32_t uMagic;
195
196 /** The chunk size. */
197 uint32_t cbChunk;
198 /** The maximum number of chunks. */
199 uint32_t cMaxChunks;
200 /** The current number of chunks. */
201 uint32_t cChunks;
202 /** Hint where to start looking for available memory. */
203 uint32_t idxChunkHint;
204 /** Statistics: Current number of allocations. */
205 uint32_t cAllocations;
206
207 /** The total amount of memory available. */
208 uint64_t cbTotal;
209 /** Total amount of free memory. */
210 uint64_t cbFree;
211 /** Total amount of memory allocated. */
212 uint64_t cbAllocated;
213
214#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
215 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
216 *
217 * Since the chunk size is a power of two and the minimum chunk size is a lot
218 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
219 * require a whole number of uint64_t elements in the allocation bitmap. So,
220 * for sake of simplicity, they are allocated as one continous chunk for
221 * simplicity/laziness. */
222 uint64_t *pbmAlloc;
223 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
224 uint32_t cUnitsPerChunk;
225 /** Number of bitmap elements per chunk (for quickly locating the bitmap
226 * portion corresponding to an chunk). */
227 uint32_t cBitmapElementsPerChunk;
228#else
229 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
230 * @{ */
231 /** The size of the heap internal block header. This is used to adjust the
232 * request memory size to make sure there is exacly enough room for a header at
233 * the end of the blocks we allocate before the next 64 byte alignment line. */
234 uint32_t cbHeapBlockHdr;
235 /** The size of initial heap allocation required make sure the first
236 * allocation is correctly aligned. */
237 uint32_t cbHeapAlignTweak;
238 /** The alignment tweak allocation address. */
239 void *pvAlignTweak;
240 /** @} */
241#endif
242
243#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
244 /** Pointer to the array of unwind info running parallel to aChunks (same
245 * allocation as this structure, located after the bitmaps).
246 * (For Windows, the structures must reside in 32-bit RVA distance to the
247 * actual chunk, so they are allocated off the chunk.) */
248 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
249#endif
250
251 /** The allocation chunks. */
252 RT_FLEXIBLE_ARRAY_EXTENSION
253 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
254} IEMEXECMEMALLOCATOR;
255/** Pointer to an executable memory allocator. */
256typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
257
258/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
259#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
260
261
262static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator);
263
264
265/**
266 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
267 * the heap statistics.
268 */
269static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
270 uint32_t cbReq, uint32_t idxChunk)
271{
272 pExecMemAllocator->cAllocations += 1;
273 pExecMemAllocator->cbAllocated += cbReq;
274#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
275 pExecMemAllocator->cbFree -= cbReq;
276#else
277 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
278#endif
279 pExecMemAllocator->idxChunkHint = idxChunk;
280
281#ifdef RT_OS_DARWIN
282 /*
283 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
284 * on darwin. So, we mark the pages returned as read+write after alloc and
285 * expect the caller to call iemExecMemAllocatorReadyForUse when done
286 * writing to the allocation.
287 *
288 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
289 * for details.
290 */
291 /** @todo detect if this is necessary... it wasn't required on 10.15 or
292 * whatever older version it was. */
293 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
294 AssertRC(rc);
295#endif
296
297 return pvRet;
298}
299
300
301#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
302static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
303 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
304{
305 /*
306 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
307 */
308 Assert(!(cToScan & 63));
309 Assert(!(idxFirst & 63));
310 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
311 pbmAlloc += idxFirst / 64;
312
313 /*
314 * Scan the bitmap for cReqUnits of consequtive clear bits
315 */
316 /** @todo This can probably be done more efficiently for non-x86 systems. */
317 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
318 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
319 {
320 uint32_t idxAddBit = 1;
321 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
322 idxAddBit++;
323 if (idxAddBit >= cReqUnits)
324 {
325 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
326
327 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
328 pChunk->cFreeUnits -= cReqUnits;
329 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
330
331 void * const pvRet = (uint8_t *)pChunk->pvChunk
332 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
333
334 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
335 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
336 }
337
338 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
339 }
340 return NULL;
341}
342#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
343
344
345static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
346{
347#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
348 /*
349 * Figure out how much to allocate.
350 */
351 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
352 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
353 {
354 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
355 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
356 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
357 {
358 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
359 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
360 if (pvRet)
361 return pvRet;
362 }
363 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
364 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
365 cReqUnits, idxChunk);
366 }
367#else
368 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
369 if (pvRet)
370 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
371#endif
372 return NULL;
373
374}
375
376
377/**
378 * Allocates @a cbReq bytes of executable memory.
379 *
380 * @returns Pointer to the memory, NULL if out of memory or other problem
381 * encountered.
382 * @param pVCpu The cross context virtual CPU structure of the calling
383 * thread.
384 * @param cbReq How many bytes are required.
385 */
386static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
387{
388 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
389 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
390 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
391
392 /*
393 * Adjust the request size so it'll fit the allocator alignment/whatnot.
394 *
395 * For the RTHeapSimple allocator this means to follow the logic described
396 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
397 * existing chunks if we think we've got sufficient free memory around.
398 *
399 * While for the alternative one we just align it up to a whole unit size.
400 */
401#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
402 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
403#else
404 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
405#endif
406 if (cbReq <= pExecMemAllocator->cbFree)
407 {
408 uint32_t const cChunks = pExecMemAllocator->cChunks;
409 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
410 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
411 {
412 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
413 if (pvRet)
414 return pvRet;
415 }
416 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
417 {
418 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
419 if (pvRet)
420 return pvRet;
421 }
422 }
423
424 /*
425 * Can we grow it with another chunk?
426 */
427 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
428 {
429 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
430 AssertLogRelRCReturn(rc, NULL);
431
432 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
433 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
434 if (pvRet)
435 return pvRet;
436 AssertFailed();
437 }
438
439 /* What now? Prune native translation blocks from the cache? */
440 AssertFailed();
441 return NULL;
442}
443
444
445/** This is a hook that we may need later for changing memory protection back
446 * to readonly+exec */
447static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
448{
449#ifdef RT_OS_DARWIN
450 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
451 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
452 AssertRC(rc); RT_NOREF(pVCpu);
453
454 /*
455 * Flush the instruction cache:
456 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
457 */
458 /* sys_dcache_flush(pv, cb); - not necessary */
459 sys_icache_invalidate(pv, cb);
460#else
461 RT_NOREF(pVCpu, pv, cb);
462#endif
463}
464
465
466/**
467 * Frees executable memory.
468 */
469void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
470{
471 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
472 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
473 Assert(pv);
474#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
475 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
476#else
477 Assert(!((uintptr_t)pv & 63));
478#endif
479
480 /* Align the size as we did when allocating the block. */
481#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
482 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
483#else
484 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
485#endif
486
487 /* Free it / assert sanity. */
488#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
489 uint32_t const cChunks = pExecMemAllocator->cChunks;
490 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
491 bool fFound = false;
492 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
493 {
494 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
495 fFound = offChunk < cbChunk;
496 if (fFound)
497 {
498#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
499 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
500 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
501
502 /* Check that it's valid and free it. */
503 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
504 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
505 for (uint32_t i = 1; i < cReqUnits; i++)
506 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
507 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
508
509 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
510 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
511
512 /* Update the stats. */
513 pExecMemAllocator->cbAllocated -= cb;
514 pExecMemAllocator->cbFree += cb;
515 pExecMemAllocator->cAllocations -= 1;
516 return;
517#else
518 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
519 break;
520#endif
521 }
522 }
523# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
524 AssertFailed();
525# else
526 Assert(fFound);
527# endif
528#endif
529
530#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
531 /* Update stats while cb is freshly calculated.*/
532 pExecMemAllocator->cbAllocated -= cb;
533 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
534 pExecMemAllocator->cAllocations -= 1;
535
536 /* Free it. */
537 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
538#endif
539}
540
541
542
543#ifdef IN_RING3
544# ifdef RT_OS_WINDOWS
545
546/**
547 * Initializes the unwind info structures for windows hosts.
548 */
549static int
550iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
551{
552 /*
553 * The AMD64 unwind opcodes.
554 *
555 * This is a program that starts with RSP after a RET instruction that
556 * ends up in recompiled code, and the operations we describe here will
557 * restore all non-volatile registers and bring RSP back to where our
558 * RET address is. This means it's reverse order from what happens in
559 * the prologue.
560 *
561 * Note! Using a frame register approach here both because we have one
562 * and but mainly because the UWOP_ALLOC_LARGE argument values
563 * would be a pain to write initializers for. On the positive
564 * side, we're impervious to changes in the the stack variable
565 * area can can deal with dynamic stack allocations if necessary.
566 */
567 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
568 {
569 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
570 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
571 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
572 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
573 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
574 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
575 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
576 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
577 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
578 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
579 };
580 union
581 {
582 IMAGE_UNWIND_INFO Info;
583 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
584 } s_UnwindInfo =
585 {
586 {
587 /* .Version = */ 1,
588 /* .Flags = */ 0,
589 /* .SizeOfProlog = */ 16, /* whatever */
590 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
591 /* .FrameRegister = */ X86_GREG_xBP,
592 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
593 }
594 };
595 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
596 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
597
598 /*
599 * Calc how much space we need and allocate it off the exec heap.
600 */
601 unsigned const cFunctionEntries = 1;
602 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
603 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
604# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
606 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
607 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
608# else
609 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
610 - pExecMemAllocator->cbHeapBlockHdr;
611 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
612 32 /*cbAlignment*/);
613# endif
614 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
615 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
616
617 /*
618 * Initialize the structures.
619 */
620 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
621
622 paFunctions[0].BeginAddress = 0;
623 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
624 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
625
626 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
627 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
628
629 /*
630 * Register it.
631 */
632 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
633 AssertReturn(fRet, NULL); /* Nothing to clean up on failure, since its within the chunk itself. */
634
635 return paFunctions;
636}
637
638
639# else /* !RT_OS_WINDOWS */
640
641/**
642 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
643 */
644DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
645{
646 if (iValue >= 64)
647 {
648 Assert(iValue < 0x2000);
649 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
650 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
651 }
652 else if (iValue >= 0)
653 *Ptr.pb++ = (uint8_t)iValue;
654 else if (iValue > -64)
655 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
656 else
657 {
658 Assert(iValue > -0x2000);
659 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
660 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
661 }
662 return Ptr;
663}
664
665
666/**
667 * Emits an ULEB128 encoded value (up to 64-bit wide).
668 */
669DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
670{
671 while (uValue >= 0x80)
672 {
673 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
674 uValue >>= 7;
675 }
676 *Ptr.pb++ = (uint8_t)uValue;
677 return Ptr;
678}
679
680
681/**
682 * Emits a CFA rule as register @a uReg + offset @a off.
683 */
684DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
685{
686 *Ptr.pb++ = DW_CFA_def_cfa;
687 Ptr = iemDwarfPutUleb128(Ptr, uReg);
688 Ptr = iemDwarfPutUleb128(Ptr, off);
689 return Ptr;
690}
691
692
693/**
694 * Emits a register (@a uReg) save location:
695 * CFA + @a off * data_alignment_factor
696 */
697DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
698{
699 if (uReg < 0x40)
700 *Ptr.pb++ = DW_CFA_offset | uReg;
701 else
702 {
703 *Ptr.pb++ = DW_CFA_offset_extended;
704 Ptr = iemDwarfPutUleb128(Ptr, uReg);
705 }
706 Ptr = iemDwarfPutUleb128(Ptr, off);
707 return Ptr;
708}
709
710
711# if 0 /* unused */
712/**
713 * Emits a register (@a uReg) save location, using signed offset:
714 * CFA + @a offSigned * data_alignment_factor
715 */
716DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
717{
718 *Ptr.pb++ = DW_CFA_offset_extended_sf;
719 Ptr = iemDwarfPutUleb128(Ptr, uReg);
720 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
721 return Ptr;
722}
723# endif
724
725
726/**
727 * Initializes the unwind info section for non-windows hosts.
728 */
729static int
730iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
731{
732 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
733 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
734
735 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
736
737 /*
738 * Generate the CIE first.
739 */
740# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
741 uint8_t const iDwarfVer = 3;
742# else
743 uint8_t const iDwarfVer = 4;
744# endif
745 RTPTRUNION const PtrCie = Ptr;
746 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
747 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
748 *Ptr.pb++ = iDwarfVer; /* DwARF version */
749 *Ptr.pb++ = 0; /* Augmentation. */
750 if (iDwarfVer >= 4)
751 {
752 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
753 *Ptr.pb++ = 0; /* Segment selector size. */
754 }
755# ifdef RT_ARCH_AMD64
756 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
757# else
758 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
759# endif
760 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
761# ifdef RT_ARCH_AMD64
762 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
763# elif defined(RT_ARCH_ARM64)
764 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
765# else
766# error "port me"
767# endif
768 /* Initial instructions: */
769# ifdef RT_ARCH_AMD64
770 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
771 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
772 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
773 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
774 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
775 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
776 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
777 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
778# elif defined(RT_ARCH_ARM64)
779# if 1
780 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
781# else
782 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
783# endif
784 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
785 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
786 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
787 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
788 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
789 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
790 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
791 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
792 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
793 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
794 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
795 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
796 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
797 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
798# else
799# error "port me"
800# endif
801 while ((Ptr.u - PtrCie.u) & 3)
802 *Ptr.pb++ = DW_CFA_nop;
803 /* Finalize the CIE size. */
804 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
805
806 /*
807 * Generate an FDE for the whole chunk area.
808 */
809# ifdef IEMNATIVE_USE_LIBUNWIND
810 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
811# endif
812 RTPTRUNION const PtrFde = Ptr;
813 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
814 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
815 Ptr.pu32++;
816 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
817 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
818# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
819 *Ptr.pb++ = DW_CFA_nop;
820# endif
821 while ((Ptr.u - PtrFde.u) & 3)
822 *Ptr.pb++ = DW_CFA_nop;
823 /* Finalize the FDE size. */
824 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
825
826 /* Terminator entry. */
827 *Ptr.pu32++ = 0;
828 *Ptr.pu32++ = 0; /* just to be sure... */
829 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
830
831 /*
832 * Register it.
833 */
834# ifdef IEMNATIVE_USE_LIBUNWIND
835 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
836# else
837 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
838 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
839# endif
840
841 return VINF_SUCCESS;
842}
843
844# endif /* !RT_OS_WINDOWS */
845#endif /* IN_RING3 */
846
847
848/**
849 * Adds another chunk to the executable memory allocator.
850 *
851 * This is used by the init code for the initial allocation and later by the
852 * regular allocator function when it's out of memory.
853 */
854static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator)
855{
856 /* Check that we've room for growth. */
857 uint32_t const idxChunk = pExecMemAllocator->cChunks;
858 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
859
860 /* Allocate a chunk. */
861#ifdef RT_OS_DARWIN
862 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
863#else
864 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
865#endif
866 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
867
868#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
869 int rc = VINF_SUCCESS;
870#else
871 /* Initialize the heap for the chunk. */
872 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
873 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
874 AssertRC(rc);
875 if (RT_SUCCESS(rc))
876 {
877 /*
878 * We want the memory to be aligned on 64 byte, so the first time thru
879 * here we do some exploratory allocations to see how we can achieve this.
880 * On subsequent runs we only make an initial adjustment allocation, if
881 * necessary.
882 *
883 * Since we own the heap implementation, we know that the internal block
884 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
885 * so all we need to wrt allocation size adjustments is to add 32 bytes
886 * to the size, align up by 64 bytes, and subtract 32 bytes.
887 *
888 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
889 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
890 * allocation to force subsequent allocations to return 64 byte aligned
891 * user areas.
892 */
893 if (!pExecMemAllocator->cbHeapBlockHdr)
894 {
895 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
896 pExecMemAllocator->cbHeapAlignTweak = 64;
897 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
898 32 /*cbAlignment*/);
899 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
900
901 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
902 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
903 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
904 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
905 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
906
907 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
908 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
909 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
910 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
911 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
912
913 RTHeapSimpleFree(hHeap, pvTest2);
914 RTHeapSimpleFree(hHeap, pvTest1);
915 }
916 else
917 {
918 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
919 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
920 }
921 if (RT_SUCCESS(rc))
922#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
923 {
924 /*
925 * Add the chunk.
926 *
927 * This must be done before the unwind init so windows can allocate
928 * memory from the chunk when using the alternative sub-allocator.
929 */
930 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
931#ifdef IN_RING3
932 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
933#endif
934#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
935 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
936#else
937 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
938 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
939 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
940 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
941#endif
942
943 pExecMemAllocator->cChunks = idxChunk + 1;
944 pExecMemAllocator->idxChunkHint = idxChunk;
945
946#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
947 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
948 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
949#else
950 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
951 pExecMemAllocator->cbTotal += cbFree;
952 pExecMemAllocator->cbFree += cbFree;
953#endif
954
955#ifdef IN_RING3
956 /*
957 * Initialize the unwind information (this cannot really fail atm).
958 * (This sets pvUnwindInfo.)
959 */
960 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk, idxChunk);
961 if (RT_SUCCESS(rc))
962#endif
963 {
964 return VINF_SUCCESS;
965 }
966
967#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
968 /* Just in case the impossible happens, undo the above up: */
969 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
970 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
971 pExecMemAllocator->cChunks = idxChunk;
972 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
973 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
974 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
975 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
976#endif
977 }
978#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
979 }
980#endif
981 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
982 return rc;
983}
984
985
986/**
987 * Initializes the executable memory allocator for native recompilation on the
988 * calling EMT.
989 *
990 * @returns VBox status code.
991 * @param pVCpu The cross context virtual CPU structure of the calling
992 * thread.
993 * @param cbMax The max size of the allocator.
994 * @param cbInitial The initial allocator size.
995 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
996 * dependent).
997 */
998int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
999{
1000 /*
1001 * Validate input.
1002 */
1003 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1004 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1005 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1006 || cbChunk == 0
1007 || ( RT_IS_POWER_OF_TWO(cbChunk)
1008 && cbChunk >= _1M
1009 && cbChunk <= _256M
1010 && cbChunk <= cbMax),
1011 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1012 VERR_OUT_OF_RANGE);
1013
1014 /*
1015 * Adjust/figure out the chunk size.
1016 */
1017 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1018 {
1019 if (cbMax >= _256M)
1020 cbChunk = _64M;
1021 else
1022 {
1023 if (cbMax < _16M)
1024 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1025 else
1026 cbChunk = (uint32_t)cbMax / 4;
1027 if (!RT_IS_POWER_OF_TWO(cbChunk))
1028 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1029 }
1030 }
1031
1032 if (cbChunk > cbMax)
1033 cbMax = cbChunk;
1034 else
1035 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1036 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1037 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1038
1039 /*
1040 * Allocate and initialize the allocatore instance.
1041 */
1042 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1043#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1044 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1045 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1046 cbNeeded += cbBitmap * cMaxChunks;
1047 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1048 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1049#endif
1050#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1051 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1052 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1053#endif
1054 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1055 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1056 VERR_NO_MEMORY);
1057 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1058 pExecMemAllocator->cbChunk = cbChunk;
1059 pExecMemAllocator->cMaxChunks = cMaxChunks;
1060 pExecMemAllocator->cChunks = 0;
1061 pExecMemAllocator->idxChunkHint = 0;
1062 pExecMemAllocator->cAllocations = 0;
1063 pExecMemAllocator->cbTotal = 0;
1064 pExecMemAllocator->cbFree = 0;
1065 pExecMemAllocator->cbAllocated = 0;
1066#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1067 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1068 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1069 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1070 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1071#endif
1072#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1073 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1074#endif
1075 for (uint32_t i = 0; i < cMaxChunks; i++)
1076 {
1077#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1078 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1079 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1080#else
1081 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1082#endif
1083 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1084#ifdef IN_RING0
1085 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1086#else
1087 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1088#endif
1089 }
1090 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1091
1092 /*
1093 * Do the initial allocations.
1094 */
1095 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1096 {
1097 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
1098 AssertLogRelRCReturn(rc, rc);
1099 }
1100
1101 pExecMemAllocator->idxChunkHint = 0;
1102
1103 return VINF_SUCCESS;
1104}
1105
1106
1107/*********************************************************************************************************************************
1108* Native Recompilation *
1109*********************************************************************************************************************************/
1110
1111
1112/**
1113 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1114 */
1115IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1116{
1117 pVCpu->iem.s.cInstructions += idxInstr;
1118 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1119}
1120
1121
1122/**
1123 * Reinitializes the native recompiler state.
1124 *
1125 * Called before starting a new recompile job.
1126 */
1127static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative)
1128{
1129 pReNative->cLabels = 0;
1130 pReNative->cFixups = 0;
1131 return pReNative;
1132}
1133
1134
1135/**
1136 * Allocates and initializes the native recompiler state.
1137 *
1138 * This is called the first time an EMT wants to recompile something.
1139 *
1140 * @returns Pointer to the new recompiler state.
1141 * @param pVCpu The cross context virtual CPU structure of the calling
1142 * thread.
1143 * @thread EMT(pVCpu)
1144 */
1145static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu)
1146{
1147 VMCPU_ASSERT_EMT(pVCpu);
1148
1149 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1150 AssertReturn(pReNative, NULL);
1151
1152 /*
1153 * Try allocate all the buffers and stuff we need.
1154 */
1155 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1156 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1157 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1158 if (RT_LIKELY( pReNative->pInstrBuf
1159 && pReNative->paLabels
1160 && pReNative->paFixups))
1161 {
1162 /*
1163 * Set the buffer & array sizes on success.
1164 */
1165 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1166 pReNative->cLabelsAlloc = _8K;
1167 pReNative->cFixupsAlloc = _16K;
1168
1169 /*
1170 * Done, just need to save it and reinit it.
1171 */
1172 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1173 return iemNativeReInit(pReNative);
1174 }
1175
1176 /*
1177 * Failed. Cleanup and return.
1178 */
1179 AssertFailed();
1180 RTMemFree(pReNative->pInstrBuf);
1181 RTMemFree(pReNative->paLabels);
1182 RTMemFree(pReNative->paFixups);
1183 RTMemFree(pReNative);
1184 return NULL;
1185}
1186
1187
1188/**
1189 * Defines a label.
1190 *
1191 * @returns Label ID.
1192 * @param pReNative The native recompile state.
1193 * @param enmType The label type.
1194 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1195 * label is not yet defined (default).
1196 * @param uData Data associated with the lable. Only applicable to
1197 * certain type of labels. Default is zero.
1198 */
1199DECLHIDDEN(uint32_t) iemNativeMakeLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1200 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
1201{
1202 /*
1203 * Do we have the label already?
1204 */
1205 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1206 uint32_t const cLabels = pReNative->cLabels;
1207 for (uint32_t i = 0; i < cLabels; i++)
1208 if ( paLabels[i].enmType == enmType
1209 && paLabels[i].uData == uData)
1210 {
1211 if (paLabels[i].off == offWhere || offWhere == UINT32_MAX)
1212 return i;
1213 if (paLabels[i].off == UINT32_MAX)
1214 {
1215 paLabels[i].off = offWhere;
1216 return i;
1217 }
1218 }
1219
1220 /*
1221 * Make sure we've got room for another label.
1222 */
1223 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1224 { /* likely */ }
1225 else
1226 {
1227 uint32_t cNew = pReNative->cLabelsAlloc;
1228 AssertReturn(cNew, UINT32_MAX);
1229 AssertReturn(cLabels == cNew, UINT32_MAX);
1230 cNew *= 2;
1231 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1232 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1233 AssertReturn(paLabels, UINT32_MAX);
1234 pReNative->paLabels = paLabels;
1235 pReNative->cLabelsAlloc = cNew;
1236 }
1237
1238 /*
1239 * Define a new label.
1240 */
1241 paLabels[cLabels].off = offWhere;
1242 paLabels[cLabels].enmType = enmType;
1243 paLabels[cLabels].uData = uData;
1244 pReNative->cLabels = cLabels + 1;
1245 return cLabels;
1246}
1247
1248
1249/**
1250 * Looks up a lable.
1251 *
1252 * @returns Label ID if found, UINT32_MAX if not.
1253 */
1254static uint32_t iemNativeFindLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1255 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1256{
1257 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1258 uint32_t const cLabels = pReNative->cLabels;
1259 for (uint32_t i = 0; i < cLabels; i++)
1260 if ( paLabels[i].enmType == enmType
1261 && paLabels[i].uData == uData
1262 && ( paLabels[i].off == offWhere
1263 || offWhere == UINT32_MAX
1264 || paLabels[i].off == UINT32_MAX))
1265 return i;
1266 return UINT32_MAX;
1267}
1268
1269
1270
1271/**
1272 * Adds a fixup.
1273 *
1274 * @returns Success indicator.
1275 * @param pReNative The native recompile state.
1276 * @param offWhere The instruction offset of the fixup location.
1277 * @param idxLabel The target label ID for the fixup.
1278 * @param enmType The fixup type.
1279 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1280 */
1281DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1282 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/) RT_NOEXCEPT
1283{
1284 Assert(idxLabel <= UINT16_MAX);
1285 Assert((unsigned)enmType <= UINT8_MAX);
1286
1287 /*
1288 * Make sure we've room.
1289 */
1290 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1291 uint32_t const cFixups = pReNative->cFixups;
1292 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1293 { /* likely */ }
1294 else
1295 {
1296 uint32_t cNew = pReNative->cFixupsAlloc;
1297 AssertReturn(cNew, false);
1298 AssertReturn(cFixups == cNew, false);
1299 cNew *= 2;
1300 AssertReturn(cNew <= _128K, false);
1301 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1302 AssertReturn(paFixups, false);
1303 pReNative->paFixups = paFixups;
1304 pReNative->cFixupsAlloc = cNew;
1305 }
1306
1307 /*
1308 * Add the fixup.
1309 */
1310 paFixups[cFixups].off = offWhere;
1311 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1312 paFixups[cFixups].enmType = enmType;
1313 paFixups[cFixups].offAddend = offAddend;
1314 pReNative->cFixups = cFixups + 1;
1315 return true;
1316}
1317
1318/**
1319 * Slow code path for iemNativeInstrBufEnsure.
1320 */
1321DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1322 uint32_t cInstrReq) RT_NOEXCEPT
1323{
1324 /* Double the buffer size till we meet the request. */
1325 uint32_t cNew = pReNative->cInstrBufAlloc;
1326 AssertReturn(cNew > 0, NULL);
1327 do
1328 cNew *= 2;
1329 while (cNew < off + cInstrReq);
1330
1331 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1332 AssertReturn(cbNew <= _2M, NULL);
1333
1334 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1335 AssertReturn(pvNew, NULL);
1336
1337 pReNative->cInstrBufAlloc = cNew;
1338 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1339}
1340
1341
1342/**
1343 * Emits a code for checking the return code of a call and rcPassUp, returning
1344 * from the code if either are non-zero.
1345 */
1346DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1347 uint8_t idxInstr) RT_NOEXCEPT
1348{
1349#ifdef RT_ARCH_AMD64
1350 /*
1351 * AMD64: eax = call status code.
1352 */
1353
1354 /* edx = rcPassUp */
1355 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
1356 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1357
1358 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1359 AssertReturn(pbCodeBuf, UINT32_MAX);
1360
1361 /* edx = eax | rcPassUp*/
1362 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
1363 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
1364
1365 /* Jump to non-zero status return path, loading cl with the instruction number. */
1366 pbCodeBuf[off++] = 0xb0 + X86_GREG_xCX; /* mov cl, imm8 (pCallEntry->idxInstr) */
1367 pbCodeBuf[off++] = idxInstr;
1368
1369 pbCodeBuf[off++] = 0x0f; /* jnz rel32 */
1370 pbCodeBuf[off++] = 0x85;
1371 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
1372 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
1373 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4), UINT32_MAX);
1374 pbCodeBuf[off++] = 0x00;
1375 pbCodeBuf[off++] = 0x00;
1376 pbCodeBuf[off++] = 0x00;
1377 pbCodeBuf[off++] = 0x00;
1378
1379 /* done. */
1380
1381#elif RT_ARCH_ARM64
1382 /*
1383 * ARM64: w0 = call status code.
1384 */
1385 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
1386 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
1387
1388 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1389 AssertReturn(pu32CodeBuf, UINT32_MAX);
1390
1391 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
1392
1393 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
1394 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
1395 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5), UINT32_MAX);
1396 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
1397
1398#else
1399# error "port me"
1400#endif
1401 return off;
1402}
1403
1404
1405/**
1406 * Emits a call to a threaded worker function.
1407 */
1408static int32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
1409{
1410#ifdef VBOX_STRICT
1411 off = iemNativeEmitMarker(pReNative, off);
1412 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1413#endif
1414 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
1415
1416#ifdef RT_ARCH_AMD64
1417 /* Load the parameters and emit the call. */
1418# ifdef RT_OS_WINDOWS
1419# ifndef VBOXSTRICTRC_STRICT_ENABLED
1420 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
1421 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1422 if (cParams > 0)
1423 {
1424 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
1425 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1426 }
1427 if (cParams > 1)
1428 {
1429 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
1430 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1431 }
1432 if (cParams > 2)
1433 {
1434 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
1435 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1436 }
1437# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
1438 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
1439 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1440 if (cParams > 0)
1441 {
1442 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
1443 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1444 }
1445 if (cParams > 1)
1446 {
1447 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
1448 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1449 }
1450 if (cParams > 2)
1451 {
1452 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
1453 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1454 }
1455 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
1456 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1457 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
1458 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1459# endif /* VBOXSTRICTRC_STRICT_ENABLED */
1460# else
1461 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
1462 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1463 if (cParams > 0)
1464 {
1465 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
1466 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1467 }
1468 if (cParams > 1)
1469 {
1470 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
1471 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1472 }
1473 if (cParams > 2)
1474 {
1475 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
1476 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1477 }
1478# endif
1479 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
1480 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1481
1482 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1483 AssertReturn(pbCodeBuf, UINT32_MAX);
1484 pbCodeBuf[off++] = 0xff; /* call rax */
1485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
1486
1487# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
1488 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
1489# endif
1490
1491#elif RT_ARCH_ARM64
1492 /*
1493 * ARM64:
1494 */
1495 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1496 if (cParams > 0)
1497 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
1498 if (cParams > 1)
1499 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
1500 if (cParams > 2)
1501 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
1502 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0,
1503 (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
1504
1505 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1506 AssertReturn(pu32CodeBuf, UINT32_MAX);
1507
1508 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
1509
1510#else
1511# error "port me"
1512#endif
1513
1514 /*
1515 * Check the status code.
1516 */
1517 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
1518 AssertReturn(off != UINT32_MAX, off);
1519
1520 return off;
1521}
1522
1523
1524/**
1525 * Emits a standard epilog.
1526 */
1527static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
1528{
1529 /*
1530 * Generate the rc + rcPassUp fiddling code if needed.
1531 */
1532 uint32_t idxLabel = iemNativeFindLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
1533 if (idxLabel != UINT32_MAX)
1534 {
1535 Assert(pReNative->paLabels[idxLabel].off == UINT32_MAX);
1536 pReNative->paLabels[idxLabel].off = off;
1537
1538 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
1539#ifdef RT_ARCH_AMD64
1540 /*
1541 * AMD64:
1542 */
1543 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
1544 AssertReturn(pbCodeBuf, UINT32_MAX);
1545
1546 /* Call helper and jump to return point. */
1547# ifdef RT_OS_WINDOWS
1548 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
1549 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
1551 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1552 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
1553 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1554# else
1555 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
1556 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1557 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
1558 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1559 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
1560 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1561# endif
1562 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
1563 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1564
1565 pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1566 AssertReturn(pbCodeBuf, UINT32_MAX);
1567 pbCodeBuf[off++] = 0xff; /* call rax */
1568 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
1569
1570 /* Jump to common return point. */
1571 uint32_t offRel = pReNative->paLabels[idxReturnLabel].off - (off + 2);
1572 if (-(int32_t)offRel <= 127)
1573 {
1574 pbCodeBuf[off++] = 0xeb; /* jmp rel8 */
1575 pbCodeBuf[off++] = (uint8_t)offRel;
1576 off++;
1577 }
1578 else
1579 {
1580 offRel -= 3;
1581 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */
1582 pbCodeBuf[off++] = RT_BYTE1(offRel);
1583 pbCodeBuf[off++] = RT_BYTE2(offRel);
1584 pbCodeBuf[off++] = RT_BYTE3(offRel);
1585 pbCodeBuf[off++] = RT_BYTE4(offRel);
1586 }
1587 pbCodeBuf[off++] = 0xcc; /* int3 poison */
1588
1589#elif RT_ARCH_ARM64
1590 /*
1591 * ARM64:
1592 */
1593 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
1594 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1595 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1596 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1597 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
1598 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
1599 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1600
1601 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1602 AssertReturn(pu32CodeBuf, UINT32_MAX);
1603 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
1604
1605 /* Jump back to the common return point. */
1606 int32_t const offRel = pReNative->paLabels[idxReturnLabel].off - off;
1607 pu32CodeBuf[off++] = Armv8A64MkInstrB(offRel);
1608#else
1609# error "port me"
1610#endif
1611 }
1612 return off;
1613}
1614
1615
1616/**
1617 * Emits a standard epilog.
1618 */
1619static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1620{
1621 /*
1622 * Successful return, so clear the return register (eax, w0).
1623 */
1624 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
1625 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1626
1627 /*
1628 * Define label for common return point.
1629 */
1630 uint32_t const idxReturn = iemNativeMakeLabel(pReNative, kIemNativeLabelType_Return, off);
1631 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
1632
1633 /*
1634 * Restore registers and return.
1635 */
1636#ifdef RT_ARCH_AMD64
1637 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
1638 AssertReturn(pbCodeBuf, UINT32_MAX);
1639
1640 /* Reposition esp at the r15 restore point. */
1641 pbCodeBuf[off++] = X86_OP_REX_W;
1642 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
1643 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
1644 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
1645
1646 /* Pop non-volatile registers and return */
1647 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
1648 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
1649 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
1650 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
1651 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
1652 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
1653 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
1654 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
1655# ifdef RT_OS_WINDOWS
1656 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
1657 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
1658# endif
1659 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
1660 pbCodeBuf[off++] = 0xc9; /* leave */
1661 pbCodeBuf[off++] = 0xc3; /* ret */
1662 pbCodeBuf[off++] = 0xcc; /* int3 poison */
1663
1664#elif RT_ARCH_ARM64
1665 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1666 AssertReturn(pu32CodeBuf, UINT32_MAX);
1667
1668 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
1669 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
1670 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
1671 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
1672 IEMNATIVE_FRAME_VAR_SIZE / 8);
1673 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
1674 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1675 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
1676 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1677 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
1678 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1679 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
1680 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1681 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
1682 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1683 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
1684 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1685
1686 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
1687 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
1688 pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE);
1689
1690 /* retab / ret */
1691# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
1692 if (1)
1693 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
1694 else
1695# endif
1696 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
1697
1698#else
1699# error "port me"
1700#endif
1701
1702 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
1703}
1704
1705
1706/**
1707 * Emits a standard prolog.
1708 */
1709static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1710{
1711#ifdef RT_ARCH_AMD64
1712 /*
1713 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
1714 * reserving 64 bytes for stack variables plus 4 non-register argument
1715 * slots. Fixed register assignment: xBX = pReNative;
1716 *
1717 * Since we always do the same register spilling, we can use the same
1718 * unwind description for all the code.
1719 */
1720 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1721 AssertReturn(pbCodeBuf, UINT32_MAX);
1722 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
1723 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
1724 pbCodeBuf[off++] = 0x8b;
1725 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
1726 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
1727 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
1728# ifdef RT_OS_WINDOWS
1729 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
1730 pbCodeBuf[off++] = 0x8b;
1731 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
1732 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
1733 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
1734# else
1735 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
1736 pbCodeBuf[off++] = 0x8b;
1737 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
1738# endif
1739 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
1740 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
1741 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
1742 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
1743 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
1744 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
1745 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
1746 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
1747
1748 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
1749 X86_GREG_xSP,
1750 IEMNATIVE_FRAME_ALIGN_SIZE
1751 + IEMNATIVE_FRAME_VAR_SIZE
1752 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
1753 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
1754 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
1755 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
1756 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
1757
1758#elif RT_ARCH_ARM64
1759 /*
1760 * We set up a stack frame exactly like on x86, only we have to push the
1761 * return address our selves here. We save all non-volatile registers.
1762 */
1763 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1764 AssertReturn(pu32CodeBuf, UINT32_MAX);
1765
1766# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
1767 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
1768 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
1769 * in any way conditional, so just emitting this instructions now and hoping for the best... */
1770 /* pacibsp */
1771 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
1772# endif
1773
1774 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
1775 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
1776 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
1777 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
1778 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
1779 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
1780 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1781 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
1782 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1783 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
1784 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1785 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
1786 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1787 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
1788 /* Save the BP and LR (ret address) registers at the top of the frame. */
1789 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1790 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
1791 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1792 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
1793 pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(false /*fSub*/, ARMV8_A64_REG_BP,
1794 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
1795
1796 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
1797 pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
1798
1799 /* mov r28, r0 */
1800 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
1801
1802#else
1803# error "port me"
1804#endif
1805 return off;
1806}
1807
1808
1809/**
1810 * Recompiles the given threaded TB into a native one.
1811 *
1812 * In case of failure the translation block will be returned as-is.
1813 *
1814 * @returns pTb.
1815 * @param pVCpu The cross context virtual CPU structure of the calling
1816 * thread.
1817 * @param pTb The threaded translation to recompile to native.
1818 */
1819PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
1820{
1821 /*
1822 * The first time thru, we allocate the recompiler state, the other times
1823 * we just need to reset it before using it again.
1824 */
1825 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
1826 if (RT_LIKELY(pReNative))
1827 iemNativeReInit(pReNative);
1828 else
1829 {
1830 pReNative = iemNativeInit(pVCpu);
1831 AssertReturn(pReNative, pTb);
1832 }
1833
1834 /*
1835 * Emit prolog code (fixed).
1836 */
1837 uint32_t off = iemNativeEmitProlog(pReNative, 0);
1838 AssertReturn(off != UINT32_MAX, pTb);
1839
1840 /*
1841 * Convert the calls to native code.
1842 */
1843 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
1844 uint32_t cCallsLeft = pTb->Thrd.cCalls;
1845 while (cCallsLeft-- > 0)
1846 {
1847 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
1848 AssertReturn(off != UINT32_MAX, pTb);
1849
1850 pCallEntry++;
1851 }
1852
1853 /*
1854 * Emit the epilog code.
1855 */
1856 off = iemNativeEmitEpilog(pReNative, off);
1857 AssertReturn(off != UINT32_MAX, pTb);
1858
1859 /*
1860 * Make sure all labels has been defined.
1861 */
1862 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
1863#ifdef VBOX_STRICT
1864 uint32_t const cLabels = pReNative->cLabels;
1865 for (uint32_t i = 0; i < cLabels; i++)
1866 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
1867#endif
1868
1869 /*
1870 * Allocate executable memory, copy over the code we've generated.
1871 */
1872 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1873 if (pTbAllocator->pDelayedFreeHead)
1874 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
1875
1876 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
1877 AssertReturn(paFinalInstrBuf, pTb);
1878 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
1879
1880 /*
1881 * Apply fixups.
1882 */
1883 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
1884 uint32_t const cFixups = pReNative->cFixups;
1885 for (uint32_t i = 0; i < cFixups; i++)
1886 {
1887 Assert(paFixups[i].off < off);
1888 Assert(paFixups[i].idxLabel < cLabels);
1889 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
1890 switch (paFixups[i].enmType)
1891 {
1892#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1893 case kIemNativeFixupType_Rel32:
1894 Assert(paFixups[i].off + 4 <= off);
1895 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
1896 continue;
1897
1898#elif defined(RT_ARCH_ARM64)
1899 case kIemNativeFixupType_RelImm19At5:
1900 {
1901 Assert(paFixups[i].off < off);
1902 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
1903 Assert(offDisp >= -262144 && offDisp < 262144);
1904 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
1905 continue;
1906 }
1907#endif
1908 case kIemNativeFixupType_Invalid:
1909 case kIemNativeFixupType_End:
1910 break;
1911 }
1912 AssertFailed();
1913 }
1914
1915 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
1916
1917 /*
1918 * Convert the translation block.
1919 */
1920 //RT_BREAKPOINT();
1921 RTMemFree(pTb->Thrd.paCalls);
1922 pTb->Native.paInstructions = paFinalInstrBuf;
1923 pTb->Native.cInstructions = off;
1924 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
1925
1926 Assert(pTbAllocator->cThreadedTbs > 0);
1927 pTbAllocator->cThreadedTbs -= 1;
1928 pTbAllocator->cNativeTbs += 1;
1929 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
1930
1931 return pTb;
1932}
1933
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette