VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101250

Last change on this file since 101250 was 101250, checked in by vboxsync, 19 months ago

VMM/IEM: Fixed EXC_BAD_INSTRUCTION issue. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 73.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101250 2023-09-25 00:54:00Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : ...
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): ...
18 * - Level 12 (Log12): ...
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/heap.h>
59#include <iprt/mem.h>
60#include <iprt/string.h>
61#if defined(RT_ARCH_AMD64)
62# include <iprt/x86.h>
63#elif defined(RT_ARCH_ARM64)
64# include <iprt/armv8.h>
65#endif
66
67#ifdef RT_OS_WINDOWS
68# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
70extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
71#else
72# include <iprt/formats/dwarf.h>
73# if defined(RT_OS_DARWIN)
74# include <libkern/OSCacheControl.h>
75# define IEMNATIVE_USE_LIBUNWIND
76extern "C" void __register_frame(const void *pvFde);
77extern "C" void __deregister_frame(const void *pvFde);
78# else
79extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
80extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
81# endif
82#endif
83
84#include "IEMInline.h"
85#include "IEMThreadedFunctions.h"
86#include "IEMN8veRecompiler.h"
87
88
89/*
90 * Narrow down configs here to avoid wasting time on unused configs here.
91 * Note! Same checks in IEMAllThrdRecompiler.cpp.
92 */
93
94#ifndef IEM_WITH_CODE_TLB
95# error The code TLB must be enabled for the recompiler.
96#endif
97
98#ifndef IEM_WITH_DATA_TLB
99# error The data TLB must be enabled for the recompiler.
100#endif
101
102#ifndef IEM_WITH_SETJMP
103# error The setjmp approach must be enabled for the recompiler.
104#endif
105
106
107/*********************************************************************************************************************************
108* Executable Memory Allocator *
109*********************************************************************************************************************************/
110/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
111 * Use an alternative chunk sub-allocator that does store internal data
112 * in the chunk.
113 *
114 * Using the RTHeapSimple is not practial on newer darwin systems where
115 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
116 * memory. We would have to change the protection of the whole chunk for
117 * every call to RTHeapSimple, which would be rather expensive.
118 *
119 * This alternative implemenation let restrict page protection modifications
120 * to the pages backing the executable memory we just allocated.
121 */
122#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
123/** The chunk sub-allocation unit size in bytes. */
124#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
125/** The chunk sub-allocation unit size as a shift factor. */
126#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
127
128#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
129/**
130 * Per-chunk unwind info for non-windows hosts.
131 */
132typedef struct IEMEXECMEMCHUNKEHFRAME
133{
134# ifdef IEMNATIVE_USE_LIBUNWIND
135 /** The offset of the FDA into abEhFrame. */
136 uintptr_t offFda;
137# else
138 /** struct object storage area. */
139 uint8_t abObject[1024];
140# endif
141 /** The dwarf ehframe data for the chunk. */
142 uint8_t abEhFrame[512];
143} IEMEXECMEMCHUNKEHFRAME;
144/** Pointer to per-chunk info info for non-windows hosts. */
145typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
146#endif
147
148
149/**
150 * An chunk of executable memory.
151 */
152typedef struct IEMEXECMEMCHUNK
153{
154#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 /** Number of free items in this chunk. */
156 uint32_t cFreeUnits;
157 /** Hint were to start searching for free space in the allocation bitmap. */
158 uint32_t idxFreeHint;
159#else
160 /** The heap handle. */
161 RTHEAPSIMPLE hHeap;
162#endif
163 /** Pointer to the chunk. */
164 void *pvChunk;
165#ifdef IN_RING3
166 /**
167 * Pointer to the unwind information.
168 *
169 * This is used during C++ throw and longjmp (windows and probably most other
170 * platforms). Some debuggers (windbg) makes use of it as well.
171 *
172 * Windows: This is allocated from hHeap on windows because (at least for
173 * AMD64) the UNWIND_INFO structure address in the
174 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
175 *
176 * Others: Allocated from the regular heap to avoid unnecessary executable data
177 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
178 void *pvUnwindInfo;
179#elif defined(IN_RING0)
180 /** Allocation handle. */
181 RTR0MEMOBJ hMemObj;
182#endif
183} IEMEXECMEMCHUNK;
184/** Pointer to a memory chunk. */
185typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
186
187
188/**
189 * Executable memory allocator for the native recompiler.
190 */
191typedef struct IEMEXECMEMALLOCATOR
192{
193 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
194 uint32_t uMagic;
195
196 /** The chunk size. */
197 uint32_t cbChunk;
198 /** The maximum number of chunks. */
199 uint32_t cMaxChunks;
200 /** The current number of chunks. */
201 uint32_t cChunks;
202 /** Hint where to start looking for available memory. */
203 uint32_t idxChunkHint;
204 /** Statistics: Current number of allocations. */
205 uint32_t cAllocations;
206
207 /** The total amount of memory available. */
208 uint64_t cbTotal;
209 /** Total amount of free memory. */
210 uint64_t cbFree;
211 /** Total amount of memory allocated. */
212 uint64_t cbAllocated;
213
214#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
215 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
216 *
217 * Since the chunk size is a power of two and the minimum chunk size is a lot
218 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
219 * require a whole number of uint64_t elements in the allocation bitmap. So,
220 * for sake of simplicity, they are allocated as one continous chunk for
221 * simplicity/laziness. */
222 uint64_t *pbmAlloc;
223 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
224 uint32_t cUnitsPerChunk;
225 /** Number of bitmap elements per chunk (for quickly locating the bitmap
226 * portion corresponding to an chunk). */
227 uint32_t cBitmapElementsPerChunk;
228#else
229 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
230 * @{ */
231 /** The size of the heap internal block header. This is used to adjust the
232 * request memory size to make sure there is exacly enough room for a header at
233 * the end of the blocks we allocate before the next 64 byte alignment line. */
234 uint32_t cbHeapBlockHdr;
235 /** The size of initial heap allocation required make sure the first
236 * allocation is correctly aligned. */
237 uint32_t cbHeapAlignTweak;
238 /** The alignment tweak allocation address. */
239 void *pvAlignTweak;
240 /** @} */
241#endif
242
243#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
244 /** Pointer to the array of unwind info running parallel to aChunks (same
245 * allocation as this structure, located after the bitmaps).
246 * (For Windows, the structures must reside in 32-bit RVA distance to the
247 * actual chunk, so they are allocated off the chunk.) */
248 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
249#endif
250
251 /** The allocation chunks. */
252 RT_FLEXIBLE_ARRAY_EXTENSION
253 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
254} IEMEXECMEMALLOCATOR;
255/** Pointer to an executable memory allocator. */
256typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
257
258/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
259#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
260
261
262static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator);
263
264
265/**
266 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
267 * the heap statistics.
268 */
269static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
270 uint32_t cbReq, uint32_t idxChunk)
271{
272 pExecMemAllocator->cAllocations += 1;
273 pExecMemAllocator->cbAllocated += cbReq;
274#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
275 pExecMemAllocator->cbFree -= cbReq;
276#else
277 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
278#endif
279 pExecMemAllocator->idxChunkHint = idxChunk;
280
281#ifdef RT_OS_DARWIN
282 /*
283 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
284 * on darwin. So, we mark the pages returned as read+write after alloc and
285 * expect the caller to call iemExecMemAllocatorReadyForUse when done
286 * writing to the allocation.
287 *
288 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
289 * for details.
290 */
291 /** @todo detect if this is necessary... it wasn't required on 10.15 or
292 * whatever older version it was. */
293 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
294 AssertRC(rc);
295#endif
296
297 return pvRet;
298}
299
300
301#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
302static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
303 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
304{
305 /*
306 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
307 */
308 Assert(!(cToScan & 63));
309 Assert(!(idxFirst & 63));
310 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
311 pbmAlloc += idxFirst / 64;
312
313 /*
314 * Scan the bitmap for cReqUnits of consequtive clear bits
315 */
316 /** @todo This can probably be done more efficiently for non-x86 systems. */
317 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
318 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
319 {
320 uint32_t idxAddBit = 1;
321 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
322 idxAddBit++;
323 if (idxAddBit >= cReqUnits)
324 {
325 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
326
327 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
328 pChunk->cFreeUnits -= cReqUnits;
329 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
330
331 void * const pvRet = (uint8_t *)pChunk->pvChunk
332 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
333
334 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
335 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
336 }
337
338 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
339 }
340 return NULL;
341}
342#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
343
344
345static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
346{
347#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
348 /*
349 * Figure out how much to allocate.
350 */
351 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
352 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
353 {
354 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
355 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
356 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
357 {
358 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
359 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
360 if (pvRet)
361 return pvRet;
362 }
363 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
364 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
365 cReqUnits, idxChunk);
366 }
367#else
368 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
369 if (pvRet)
370 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
371#endif
372 return NULL;
373
374}
375
376
377/**
378 * Allocates @a cbReq bytes of executable memory.
379 *
380 * @returns Pointer to the memory, NULL if out of memory or other problem
381 * encountered.
382 * @param pVCpu The cross context virtual CPU structure of the calling
383 * thread.
384 * @param cbReq How many bytes are required.
385 */
386static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
387{
388 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
389 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
390 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
391
392 /*
393 * Adjust the request size so it'll fit the allocator alignment/whatnot.
394 *
395 * For the RTHeapSimple allocator this means to follow the logic described
396 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
397 * existing chunks if we think we've got sufficient free memory around.
398 *
399 * While for the alternative one we just align it up to a whole unit size.
400 */
401#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
402 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
403#else
404 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
405#endif
406 if (cbReq <= pExecMemAllocator->cbFree)
407 {
408 uint32_t const cChunks = pExecMemAllocator->cChunks;
409 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
410 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
411 {
412 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
413 if (pvRet)
414 return pvRet;
415 }
416 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
417 {
418 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
419 if (pvRet)
420 return pvRet;
421 }
422 }
423
424 /*
425 * Can we grow it with another chunk?
426 */
427 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
428 {
429 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
430 AssertLogRelRCReturn(rc, NULL);
431
432 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
433 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
434 if (pvRet)
435 return pvRet;
436 AssertFailed();
437 }
438
439 /* What now? Prune native translation blocks from the cache? */
440 AssertFailed();
441 return NULL;
442}
443
444
445/** This is a hook that we may need later for changing memory protection back
446 * to readonly+exec */
447static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
448{
449#ifdef RT_OS_DARWIN
450 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
451 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
452 AssertRC(rc); RT_NOREF(pVCpu);
453
454 /*
455 * Flush the instruction cache:
456 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
457 */
458 /* sys_dcache_flush(pv, cb); - not necessary */
459 sys_icache_invalidate(pv, cb);
460#else
461 RT_NOREF(pVCpu, pv, cb);
462#endif
463}
464
465
466/**
467 * Frees executable memory.
468 */
469void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
470{
471 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
472 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
473 Assert(pv);
474#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
475 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
476#else
477 Assert(!((uintptr_t)pv & 63));
478#endif
479
480 /* Align the size as we did when allocating the block. */
481#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
482 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
483#else
484 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
485#endif
486
487 /* Free it / assert sanity. */
488#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
489 uint32_t const cChunks = pExecMemAllocator->cChunks;
490 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
491 bool fFound = false;
492 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
493 {
494 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
495 fFound = offChunk < cbChunk;
496 if (fFound)
497 {
498#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
499 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
500 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
501
502 /* Check that it's valid and free it. */
503 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
504 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
505 for (uint32_t i = 1; i < cReqUnits; i++)
506 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
507 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
508
509 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
510 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
511
512 /* Update the stats. */
513 pExecMemAllocator->cbAllocated -= cb;
514 pExecMemAllocator->cbFree += cb;
515 pExecMemAllocator->cAllocations -= 1;
516 return;
517#else
518 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
519 break;
520#endif
521 }
522 }
523# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
524 AssertFailed();
525# else
526 Assert(fFound);
527# endif
528#endif
529
530#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
531 /* Update stats while cb is freshly calculated.*/
532 pExecMemAllocator->cbAllocated -= cb;
533 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
534 pExecMemAllocator->cAllocations -= 1;
535
536 /* Free it. */
537 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
538#endif
539}
540
541
542
543#ifdef IN_RING3
544# ifdef RT_OS_WINDOWS
545
546/**
547 * Initializes the unwind info structures for windows hosts.
548 */
549static int
550iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
551{
552 /*
553 * The AMD64 unwind opcodes.
554 *
555 * This is a program that starts with RSP after a RET instruction that
556 * ends up in recompiled code, and the operations we describe here will
557 * restore all non-volatile registers and bring RSP back to where our
558 * RET address is. This means it's reverse order from what happens in
559 * the prologue.
560 *
561 * Note! Using a frame register approach here both because we have one
562 * and but mainly because the UWOP_ALLOC_LARGE argument values
563 * would be a pain to write initializers for. On the positive
564 * side, we're impervious to changes in the the stack variable
565 * area can can deal with dynamic stack allocations if necessary.
566 */
567 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
568 {
569 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
570 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
571 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
572 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
573 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
574 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
575 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
576 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
577 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
578 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
579 };
580 union
581 {
582 IMAGE_UNWIND_INFO Info;
583 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
584 } s_UnwindInfo =
585 {
586 {
587 /* .Version = */ 1,
588 /* .Flags = */ 0,
589 /* .SizeOfProlog = */ 16, /* whatever */
590 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
591 /* .FrameRegister = */ X86_GREG_xBP,
592 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
593 }
594 };
595 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
596 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
597
598 /*
599 * Calc how much space we need and allocate it off the exec heap.
600 */
601 unsigned const cFunctionEntries = 1;
602 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
603 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
604# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
606 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
607 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
608# else
609 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
610 - pExecMemAllocator->cbHeapBlockHdr;
611 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
612 32 /*cbAlignment*/);
613# endif
614 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
615 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
616
617 /*
618 * Initialize the structures.
619 */
620 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
621
622 paFunctions[0].BeginAddress = 0;
623 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
624 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
625
626 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
627 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
628
629 /*
630 * Register it.
631 */
632 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
633 AssertReturn(fRet, NULL); /* Nothing to clean up on failure, since its within the chunk itself. */
634
635 return paFunctions;
636}
637
638
639# else /* !RT_OS_WINDOWS */
640
641/**
642 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
643 */
644DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
645{
646 if (iValue >= 64)
647 {
648 Assert(iValue < 0x2000);
649 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
650 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
651 }
652 else if (iValue >= 0)
653 *Ptr.pb++ = (uint8_t)iValue;
654 else if (iValue > -64)
655 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
656 else
657 {
658 Assert(iValue > -0x2000);
659 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
660 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
661 }
662 return Ptr;
663}
664
665
666/**
667 * Emits an ULEB128 encoded value (up to 64-bit wide).
668 */
669DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
670{
671 while (uValue >= 0x80)
672 {
673 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
674 uValue >>= 7;
675 }
676 *Ptr.pb++ = (uint8_t)uValue;
677 return Ptr;
678}
679
680
681/**
682 * Emits a CFA rule as register @a uReg + offset @a off.
683 */
684DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
685{
686 *Ptr.pb++ = DW_CFA_def_cfa;
687 Ptr = iemDwarfPutUleb128(Ptr, uReg);
688 Ptr = iemDwarfPutUleb128(Ptr, off);
689 return Ptr;
690}
691
692
693/**
694 * Emits a register (@a uReg) save location:
695 * CFA + @a off * data_alignment_factor
696 */
697DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
698{
699 if (uReg < 0x40)
700 *Ptr.pb++ = DW_CFA_offset | uReg;
701 else
702 {
703 *Ptr.pb++ = DW_CFA_offset_extended;
704 Ptr = iemDwarfPutUleb128(Ptr, uReg);
705 }
706 Ptr = iemDwarfPutUleb128(Ptr, off);
707 return Ptr;
708}
709
710
711# if 0 /* unused */
712/**
713 * Emits a register (@a uReg) save location, using signed offset:
714 * CFA + @a offSigned * data_alignment_factor
715 */
716DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
717{
718 *Ptr.pb++ = DW_CFA_offset_extended_sf;
719 Ptr = iemDwarfPutUleb128(Ptr, uReg);
720 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
721 return Ptr;
722}
723# endif
724
725
726/**
727 * Initializes the unwind info section for non-windows hosts.
728 */
729static int
730iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
731{
732 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
733 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
734
735 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
736
737 /*
738 * Generate the CIE first.
739 */
740# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
741 uint8_t const iDwarfVer = 3;
742# else
743 uint8_t const iDwarfVer = 4;
744# endif
745 RTPTRUNION const PtrCie = Ptr;
746 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
747 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
748 *Ptr.pb++ = iDwarfVer; /* DwARF version */
749 *Ptr.pb++ = 0; /* Augmentation. */
750 if (iDwarfVer >= 4)
751 {
752 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
753 *Ptr.pb++ = 0; /* Segment selector size. */
754 }
755 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
756 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
757# ifdef RT_ARCH_AMD64
758 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
759# elif defined(RT_ARCH_ARM64)
760 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_PC); /* Return address column (ULEB128) */
761# else
762# error "port me"
763# endif
764 /* Initial instructions: */
765# ifdef RT_ARCH_AMD64
766 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
767 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
768 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
769 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
770 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
771 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
772 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
773 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
774# elif defined(RT_ARCH_ARM64)
775 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 0); /* CFA = BP + 0x00 - first stack parameter */
776 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_PC, 1); /* Ret PC = [CFA + 1*-8] */
777 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
778# endif
779 while ((Ptr.u - PtrCie.u) & 3)
780 *Ptr.pb++ = DW_CFA_nop;
781 /* Finalize the CIE size. */
782 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
783
784 /*
785 * Generate an FDE for the whole chunk area.
786 */
787# ifdef IEMNATIVE_USE_LIBUNWIND
788 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
789# endif
790 RTPTRUNION const PtrFde = Ptr;
791 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
792 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
793 Ptr.pu32++;
794 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
795 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
796 //*Ptr.pb++ = DW_CFA_nop; - not required for recent libgcc/glibc.
797 while ((Ptr.u - PtrFde.u) & 3)
798 *Ptr.pb++ = DW_CFA_nop;
799 /* Finalize the FDE size. */
800 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
801
802 /* Terminator entry. */
803 *Ptr.pu32++ = 0;
804 *Ptr.pu32++ = 0; /* just to be sure... */
805 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
806
807 /*
808 * Register it.
809 */
810# ifdef IEMNATIVE_USE_LIBUNWIND
811 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
812# else
813 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
814 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
815# endif
816
817 return VINF_SUCCESS;
818}
819
820# endif /* !RT_OS_WINDOWS */
821#endif /* IN_RING3 */
822
823
824/**
825 * Adds another chunk to the executable memory allocator.
826 *
827 * This is used by the init code for the initial allocation and later by the
828 * regular allocator function when it's out of memory.
829 */
830static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator)
831{
832 /* Check that we've room for growth. */
833 uint32_t const idxChunk = pExecMemAllocator->cChunks;
834 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
835
836 /* Allocate a chunk. */
837#ifdef RT_OS_DARWIN
838 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
839#else
840 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
841#endif
842 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
843
844#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
845 int rc = VINF_SUCCESS;
846#else
847 /* Initialize the heap for the chunk. */
848 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
849 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
850 AssertRC(rc);
851 if (RT_SUCCESS(rc))
852 {
853 /*
854 * We want the memory to be aligned on 64 byte, so the first time thru
855 * here we do some exploratory allocations to see how we can achieve this.
856 * On subsequent runs we only make an initial adjustment allocation, if
857 * necessary.
858 *
859 * Since we own the heap implementation, we know that the internal block
860 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
861 * so all we need to wrt allocation size adjustments is to add 32 bytes
862 * to the size, align up by 64 bytes, and subtract 32 bytes.
863 *
864 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
865 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
866 * allocation to force subsequent allocations to return 64 byte aligned
867 * user areas.
868 */
869 if (!pExecMemAllocator->cbHeapBlockHdr)
870 {
871 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
872 pExecMemAllocator->cbHeapAlignTweak = 64;
873 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
874 32 /*cbAlignment*/);
875 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
876
877 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
878 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
879 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
880 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
881 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
882
883 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
884 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
885 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
886 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
887 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
888
889 RTHeapSimpleFree(hHeap, pvTest2);
890 RTHeapSimpleFree(hHeap, pvTest1);
891 }
892 else
893 {
894 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
895 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
896 }
897 if (RT_SUCCESS(rc))
898#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
899 {
900 /*
901 * Add the chunk.
902 *
903 * This must be done before the unwind init so windows can allocate
904 * memory from the chunk when using the alternative sub-allocator.
905 */
906 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
907#ifdef IN_RING3
908 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
909#endif
910#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
911 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
912#else
913 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
914 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
915 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
916 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
917#endif
918
919 pExecMemAllocator->cChunks = idxChunk + 1;
920 pExecMemAllocator->idxChunkHint = idxChunk;
921
922#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
923 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
924 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
925#else
926 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
927 pExecMemAllocator->cbTotal += cbFree;
928 pExecMemAllocator->cbFree += cbFree;
929#endif
930
931#ifdef IN_RING3
932 /*
933 * Initialize the unwind information (this cannot really fail atm).
934 * (This sets pvUnwindInfo.)
935 */
936 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk, idxChunk);
937 if (RT_SUCCESS(rc))
938#endif
939 {
940 return VINF_SUCCESS;
941 }
942
943#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
944 /* Just in case the impossible happens, undo the above up: */
945 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
946 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
947 pExecMemAllocator->cChunks = idxChunk;
948 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
949 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
950 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
951 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
952#endif
953 }
954#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
955 }
956#endif
957 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
958 return rc;
959}
960
961
962/**
963 * Initializes the executable memory allocator for native recompilation on the
964 * calling EMT.
965 *
966 * @returns VBox status code.
967 * @param pVCpu The cross context virtual CPU structure of the calling
968 * thread.
969 * @param cbMax The max size of the allocator.
970 * @param cbInitial The initial allocator size.
971 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
972 * dependent).
973 */
974int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
975{
976 /*
977 * Validate input.
978 */
979 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
980 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
981 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
982 || cbChunk == 0
983 || ( RT_IS_POWER_OF_TWO(cbChunk)
984 && cbChunk >= _1M
985 && cbChunk <= _256M
986 && cbChunk <= cbMax),
987 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
988 VERR_OUT_OF_RANGE);
989
990 /*
991 * Adjust/figure out the chunk size.
992 */
993 if (cbChunk == 0 || cbChunk == UINT32_MAX)
994 {
995 if (cbMax >= _256M)
996 cbChunk = _64M;
997 else
998 {
999 if (cbMax < _16M)
1000 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1001 else
1002 cbChunk = (uint32_t)cbMax / 4;
1003 if (!RT_IS_POWER_OF_TWO(cbChunk))
1004 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1005 }
1006 }
1007
1008 if (cbChunk > cbMax)
1009 cbMax = cbChunk;
1010 else
1011 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1012 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1013 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1014
1015 /*
1016 * Allocate and initialize the allocatore instance.
1017 */
1018 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1019#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1020 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1021 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1022 cbNeeded += cbBitmap * cMaxChunks;
1023 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1024 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1025#endif
1026#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1027 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1028 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1029#endif
1030 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1031 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1032 VERR_NO_MEMORY);
1033 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1034 pExecMemAllocator->cbChunk = cbChunk;
1035 pExecMemAllocator->cMaxChunks = cMaxChunks;
1036 pExecMemAllocator->cChunks = 0;
1037 pExecMemAllocator->idxChunkHint = 0;
1038 pExecMemAllocator->cAllocations = 0;
1039 pExecMemAllocator->cbTotal = 0;
1040 pExecMemAllocator->cbFree = 0;
1041 pExecMemAllocator->cbAllocated = 0;
1042#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1043 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1044 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1045 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1046 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1047#endif
1048#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1049 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1050#endif
1051 for (uint32_t i = 0; i < cMaxChunks; i++)
1052 {
1053#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1054 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1055 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1056#else
1057 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1058#endif
1059 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1060#ifdef IN_RING0
1061 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1062#else
1063 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1064#endif
1065 }
1066 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1067
1068 /*
1069 * Do the initial allocations.
1070 */
1071 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1072 {
1073 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
1074 AssertLogRelRCReturn(rc, rc);
1075 }
1076
1077 pExecMemAllocator->idxChunkHint = 0;
1078
1079 return VINF_SUCCESS;
1080}
1081
1082
1083/*********************************************************************************************************************************
1084* Native Recompilation *
1085*********************************************************************************************************************************/
1086
1087
1088/**
1089 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1090 */
1091IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1092{
1093 pVCpu->iem.s.cInstructions += idxInstr;
1094 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1095}
1096
1097
1098/**
1099 * Reinitializes the native recompiler state.
1100 *
1101 * Called before starting a new recompile job.
1102 */
1103static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative)
1104{
1105 pReNative->cLabels = 0;
1106 pReNative->cFixups = 0;
1107 return pReNative;
1108}
1109
1110
1111/**
1112 * Allocates and initializes the native recompiler state.
1113 *
1114 * This is called the first time an EMT wants to recompile something.
1115 *
1116 * @returns Pointer to the new recompiler state.
1117 * @param pVCpu The cross context virtual CPU structure of the calling
1118 * thread.
1119 * @thread EMT(pVCpu)
1120 */
1121static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu)
1122{
1123 VMCPU_ASSERT_EMT(pVCpu);
1124
1125 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1126 AssertReturn(pReNative, NULL);
1127
1128 /*
1129 * Try allocate all the buffers and stuff we need.
1130 */
1131 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1132 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1133 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1134 if (RT_LIKELY( pReNative->pInstrBuf
1135 && pReNative->paLabels
1136 && pReNative->paFixups))
1137 {
1138 /*
1139 * Set the buffer & array sizes on success.
1140 */
1141 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1142 pReNative->cLabelsAlloc = _8K;
1143 pReNative->cFixupsAlloc = _16K;
1144
1145 /*
1146 * Done, just need to save it and reinit it.
1147 */
1148 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1149 return iemNativeReInit(pReNative);
1150 }
1151
1152 /*
1153 * Failed. Cleanup and return.
1154 */
1155 AssertFailed();
1156 RTMemFree(pReNative->pInstrBuf);
1157 RTMemFree(pReNative->paLabels);
1158 RTMemFree(pReNative->paFixups);
1159 RTMemFree(pReNative);
1160 return NULL;
1161}
1162
1163
1164/**
1165 * Defines a label.
1166 *
1167 * @returns Label ID.
1168 * @param pReNative The native recompile state.
1169 * @param enmType The label type.
1170 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1171 * label is not yet defined (default).
1172 * @param uData Data associated with the lable. Only applicable to
1173 * certain type of labels. Default is zero.
1174 */
1175DECLHIDDEN(uint32_t) iemNativeMakeLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1176 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
1177{
1178 /*
1179 * Do we have the label already?
1180 */
1181 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1182 uint32_t const cLabels = pReNative->cLabels;
1183 for (uint32_t i = 0; i < cLabels; i++)
1184 if ( paLabels[i].enmType == enmType
1185 && paLabels[i].uData == uData)
1186 {
1187 if (paLabels[i].off == offWhere || offWhere == UINT32_MAX)
1188 return i;
1189 if (paLabels[i].off == UINT32_MAX)
1190 {
1191 paLabels[i].off = offWhere;
1192 return i;
1193 }
1194 }
1195
1196 /*
1197 * Make sure we've got room for another label.
1198 */
1199 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1200 { /* likely */ }
1201 else
1202 {
1203 uint32_t cNew = pReNative->cLabelsAlloc;
1204 AssertReturn(cNew, UINT32_MAX);
1205 AssertReturn(cLabels == cNew, UINT32_MAX);
1206 cNew *= 2;
1207 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1208 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1209 AssertReturn(paLabels, UINT32_MAX);
1210 pReNative->paLabels = paLabels;
1211 pReNative->cLabelsAlloc = cNew;
1212 }
1213
1214 /*
1215 * Define a new label.
1216 */
1217 paLabels[cLabels].off = offWhere;
1218 paLabels[cLabels].enmType = enmType;
1219 paLabels[cLabels].uData = uData;
1220 pReNative->cLabels = cLabels + 1;
1221 return cLabels;
1222}
1223
1224
1225/**
1226 * Looks up a lable.
1227 *
1228 * @returns Label ID if found, UINT32_MAX if not.
1229 */
1230static uint32_t iemNativeFindLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1231 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1232{
1233 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1234 uint32_t const cLabels = pReNative->cLabels;
1235 for (uint32_t i = 0; i < cLabels; i++)
1236 if ( paLabels[i].enmType == enmType
1237 && paLabels[i].uData == uData
1238 && ( paLabels[i].off == offWhere
1239 || offWhere == UINT32_MAX
1240 || paLabels[i].off == UINT32_MAX))
1241 return i;
1242 return UINT32_MAX;
1243}
1244
1245
1246
1247/**
1248 * Adds a fixup.
1249 *
1250 * @returns Success indicator.
1251 * @param pReNative The native recompile state.
1252 * @param offWhere The instruction offset of the fixup location.
1253 * @param idxLabel The target label ID for the fixup.
1254 * @param enmType The fixup type.
1255 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1256 */
1257DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1258 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/) RT_NOEXCEPT
1259{
1260 Assert(idxLabel <= UINT16_MAX);
1261 Assert((unsigned)enmType <= UINT8_MAX);
1262
1263 /*
1264 * Make sure we've room.
1265 */
1266 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1267 uint32_t const cFixups = pReNative->cFixups;
1268 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1269 { /* likely */ }
1270 else
1271 {
1272 uint32_t cNew = pReNative->cFixupsAlloc;
1273 AssertReturn(cNew, false);
1274 AssertReturn(cFixups == cNew, false);
1275 cNew *= 2;
1276 AssertReturn(cNew <= _128K, false);
1277 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1278 AssertReturn(paFixups, false);
1279 pReNative->paFixups = paFixups;
1280 pReNative->cFixupsAlloc = cNew;
1281 }
1282
1283 /*
1284 * Add the fixup.
1285 */
1286 paFixups[cFixups].off = offWhere;
1287 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1288 paFixups[cFixups].enmType = enmType;
1289 paFixups[cFixups].offAddend = offAddend;
1290 pReNative->cFixups = cFixups + 1;
1291 return true;
1292}
1293
1294/**
1295 * Slow code path for iemNativeInstrBufEnsure.
1296 */
1297DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1298 uint32_t cInstrReq) RT_NOEXCEPT
1299{
1300 /* Double the buffer size till we meet the request. */
1301 uint32_t cNew = pReNative->cInstrBufAlloc;
1302 AssertReturn(cNew > 0, NULL);
1303 do
1304 cNew *= 2;
1305 while (cNew < off + cInstrReq);
1306
1307 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1308 AssertReturn(cbNew <= _2M, NULL);
1309
1310 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1311 AssertReturn(pvNew, NULL);
1312
1313 pReNative->cInstrBufAlloc = cNew;
1314 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1315}
1316
1317
1318/**
1319 * Emits a code for checking the return code of a call and rcPassUp, returning
1320 * from the code if either are non-zero.
1321 */
1322DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1323 uint8_t idxInstr) RT_NOEXCEPT
1324{
1325#ifdef RT_ARCH_AMD64
1326 /*
1327 * AMD64: eax = call status code.
1328 */
1329
1330 /* edx = rcPassUp */
1331 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
1332 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1333
1334 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1335 AssertReturn(pbCodeBuf, UINT32_MAX);
1336
1337 /* edx = eax | rcPassUp*/
1338 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
1339 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
1340
1341 /* Jump to non-zero status return path, loading cl with the instruction number. */
1342 pbCodeBuf[off++] = 0xb0 + X86_GREG_xCX; /* mov cl, imm8 (pCallEntry->idxInstr) */
1343 pbCodeBuf[off++] = idxInstr;
1344
1345 pbCodeBuf[off++] = 0x0f; /* jnz rel32 */
1346 pbCodeBuf[off++] = 0x85;
1347 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
1348 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
1349 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4), UINT32_MAX);
1350 pbCodeBuf[off++] = 0x00;
1351 pbCodeBuf[off++] = 0x00;
1352 pbCodeBuf[off++] = 0x00;
1353 pbCodeBuf[off++] = 0x00;
1354
1355 /* done. */
1356
1357#elif RT_ARCH_ARM64
1358 /*
1359 * ARM64: w0 = call status code.
1360 */
1361 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
1362 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
1363
1364 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1365 AssertReturn(pu32CodeBuf, UINT32_MAX);
1366
1367 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
1368
1369 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
1370 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
1371 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5), UINT32_MAX);
1372 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
1373
1374#else
1375# error "port me"
1376#endif
1377 return off;
1378}
1379
1380
1381/**
1382 * Emits a call to a threaded worker function.
1383 */
1384static int32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
1385{
1386#ifdef VBOX_STRICT
1387 off = iemNativeEmitMarker(pReNative, off);
1388 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1389#endif
1390 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
1391
1392#ifdef RT_ARCH_AMD64
1393 /* Load the parameters and emit the call. */
1394# ifdef RT_OS_WINDOWS
1395# ifndef VBOXSTRICTRC_STRICT_ENABLED
1396 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
1397 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1398 if (cParams > 0)
1399 {
1400 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
1401 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1402 }
1403 if (cParams > 1)
1404 {
1405 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
1406 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1407 }
1408 if (cParams > 2)
1409 {
1410 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
1411 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1412 }
1413# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
1414 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
1415 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1416 if (cParams > 0)
1417 {
1418 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
1419 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1420 }
1421 if (cParams > 1)
1422 {
1423 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
1424 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1425 }
1426 if (cParams > 2)
1427 {
1428 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
1429 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1430 }
1431 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
1432 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1433 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
1434 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1435# endif /* VBOXSTRICTRC_STRICT_ENABLED */
1436# else
1437 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
1438 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1439 if (cParams > 0)
1440 {
1441 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
1442 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1443 }
1444 if (cParams > 1)
1445 {
1446 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
1447 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1448 }
1449 if (cParams > 2)
1450 {
1451 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
1452 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1453 }
1454# endif
1455 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
1456 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1457
1458 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1459 AssertReturn(pbCodeBuf, UINT32_MAX);
1460 pbCodeBuf[off++] = 0xff; /* call rax */
1461 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
1462
1463# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
1464 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
1465# endif
1466
1467#elif RT_ARCH_ARM64
1468 /*
1469 * ARM64:
1470 */
1471 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1472 if (cParams > 0)
1473 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
1474 if (cParams > 1)
1475 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
1476 if (cParams > 2)
1477 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
1478 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0,
1479 (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
1480
1481 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1482 AssertReturn(pu32CodeBuf, UINT32_MAX);
1483
1484 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
1485
1486#else
1487# error "port me"
1488#endif
1489
1490 /*
1491 * Check the status code.
1492 */
1493 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
1494 AssertReturn(off != UINT32_MAX, off);
1495
1496 return off;
1497}
1498
1499
1500/**
1501 * Emits a standard epilog.
1502 */
1503static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
1504{
1505 /*
1506 * Generate the rc + rcPassUp fiddling code if needed.
1507 */
1508 uint32_t idxLabel = iemNativeFindLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
1509 if (idxLabel != UINT32_MAX)
1510 {
1511 Assert(pReNative->paLabels[idxLabel].off == UINT32_MAX);
1512 pReNative->paLabels[idxLabel].off = off;
1513
1514 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
1515#ifdef RT_ARCH_AMD64
1516 /*
1517 * AMD64:
1518 */
1519 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
1520 AssertReturn(pbCodeBuf, UINT32_MAX);
1521
1522 /* Call helper and jump to return point. */
1523# ifdef RT_OS_WINDOWS
1524 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
1525 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1526 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
1527 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1528 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
1529 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1530# else
1531 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
1532 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1533 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
1534 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1535 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
1536 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1537# endif
1538 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
1539 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1540
1541 pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1542 AssertReturn(pbCodeBuf, UINT32_MAX);
1543 pbCodeBuf[off++] = 0xff; /* call rax */
1544 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
1545
1546 /* Jump to common return point. */
1547 uint32_t offRel = pReNative->paLabels[idxReturnLabel].off - (off + 2);
1548 if (-(int32_t)offRel <= 127)
1549 {
1550 pbCodeBuf[off++] = 0xeb; /* jmp rel8 */
1551 pbCodeBuf[off++] = (uint8_t)offRel;
1552 off++;
1553 }
1554 else
1555 {
1556 offRel -= 3;
1557 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */
1558 pbCodeBuf[off++] = RT_BYTE1(offRel);
1559 pbCodeBuf[off++] = RT_BYTE2(offRel);
1560 pbCodeBuf[off++] = RT_BYTE3(offRel);
1561 pbCodeBuf[off++] = RT_BYTE4(offRel);
1562 }
1563 pbCodeBuf[off++] = 0xcc; /* int3 poison */
1564
1565#elif RT_ARCH_ARM64
1566 /*
1567 * ARM64:
1568 */
1569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
1570 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1571 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1572 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1573 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
1574 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
1575 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1576
1577 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1578 AssertReturn(pu32CodeBuf, UINT32_MAX);
1579 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
1580
1581 /* Jump back to the common return point. */
1582 int32_t const offRel = pReNative->paLabels[idxReturnLabel].off - off;
1583 pu32CodeBuf[off++] = Armv8A64MkInstrB(offRel);
1584#else
1585# error "port me"
1586#endif
1587 }
1588 return off;
1589}
1590
1591
1592/**
1593 * Emits a standard epilog.
1594 */
1595static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1596{
1597 /*
1598 * Successful return, so clear the return register (eax, w0).
1599 */
1600 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
1601 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1602
1603 /*
1604 * Define label for common return point.
1605 */
1606 uint32_t const idxReturn = iemNativeMakeLabel(pReNative, kIemNativeLabelType_Return, off);
1607 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
1608
1609 /*
1610 * Restore registers and return.
1611 */
1612#ifdef RT_ARCH_AMD64
1613 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
1614 AssertReturn(pbCodeBuf, UINT32_MAX);
1615
1616 /* Reposition esp at the r15 restore point. */
1617 pbCodeBuf[off++] = X86_OP_REX_W;
1618 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
1619 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
1620 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
1621
1622 /* Pop non-volatile registers and return */
1623 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
1624 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
1625 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
1626 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
1627 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
1628 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
1629 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
1630 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
1631# ifdef RT_OS_WINDOWS
1632 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
1633 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
1634# endif
1635 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
1636 pbCodeBuf[off++] = 0xc9; /* leave */
1637 pbCodeBuf[off++] = 0xc3; /* ret */
1638 pbCodeBuf[off++] = 0xcc; /* int3 poison */
1639
1640#elif RT_ARCH_ARM64
1641 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1642 AssertReturn(pu32CodeBuf, UINT32_MAX);
1643
1644 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
1645 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
1646 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
1647 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
1648 IEMNATIVE_FRAME_VAR_SIZE / 8);
1649 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
1650 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1651 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
1652 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1653 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
1654 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1655 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
1656 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1657 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
1658 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1659 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
1660 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1661
1662 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
1663 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
1664 pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE);
1665
1666 /* ret */
1667 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
1668
1669#else
1670# error "port me"
1671#endif
1672
1673 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
1674}
1675
1676
1677/**
1678 * Emits a standard prolog.
1679 */
1680static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /*
1684 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
1685 * reserving 64 bytes for stack variables plus 4 non-register argument
1686 * slots. Fixed register assignment: xBX = pReNative;
1687 *
1688 * Since we always do the same register spilling, we can use the same
1689 * unwind description for all the code.
1690 */
1691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1692 AssertReturn(pbCodeBuf, UINT32_MAX);
1693 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
1694 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
1695 pbCodeBuf[off++] = 0x8b;
1696 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
1697 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
1698 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
1699# ifdef RT_OS_WINDOWS
1700 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
1701 pbCodeBuf[off++] = 0x8b;
1702 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
1703 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
1704 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
1705# else
1706 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
1707 pbCodeBuf[off++] = 0x8b;
1708 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
1709# endif
1710 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
1711 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
1712 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
1713 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
1714 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
1715 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
1716 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
1717 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
1718
1719 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
1720 X86_GREG_xSP,
1721 IEMNATIVE_FRAME_ALIGN_SIZE
1722 + IEMNATIVE_FRAME_VAR_SIZE
1723 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
1724 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
1725 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
1726 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
1727 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
1728
1729#elif RT_ARCH_ARM64
1730 /*
1731 * We set up a stack frame exactly like on x86, only we have to push the
1732 * return address our selves here. We save all non-volatile registers.
1733 */
1734 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1735 AssertReturn(pu32CodeBuf, UINT32_MAX);
1736
1737 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
1738 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
1739 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
1740 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
1741 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
1742 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
1743 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1744 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
1745 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1746 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
1747 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1748 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
1749 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1750 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
1751 /* Save the BP and LR (ret address) registers at the top of the frame. */
1752 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
1753 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
1754 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1755 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
1756 pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(false /*fSub*/, ARMV8_A64_REG_BP,
1757 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
1758
1759 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
1760 pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
1761
1762 /* mov r28, r0 */
1763 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
1764
1765#else
1766# error "port me"
1767#endif
1768 return off;
1769}
1770
1771
1772/**
1773 * Recompiles the given threaded TB into a native one.
1774 *
1775 * In case of failure the translation block will be returned as-is.
1776 *
1777 * @returns pTb.
1778 * @param pVCpu The cross context virtual CPU structure of the calling
1779 * thread.
1780 * @param pTb The threaded translation to recompile to native.
1781 */
1782PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
1783{
1784 /*
1785 * The first time thru, we allocate the recompiler state, the other times
1786 * we just need to reset it before using it again.
1787 */
1788 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
1789 if (RT_LIKELY(pReNative))
1790 iemNativeReInit(pReNative);
1791 else
1792 {
1793 pReNative = iemNativeInit(pVCpu);
1794 AssertReturn(pReNative, pTb);
1795 }
1796
1797 /*
1798 * Emit prolog code (fixed).
1799 */
1800 uint32_t off = iemNativeEmitProlog(pReNative, 0);
1801 AssertReturn(off != UINT32_MAX, pTb);
1802
1803 /*
1804 * Convert the calls to native code.
1805 */
1806 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
1807 uint32_t cCallsLeft = pTb->Thrd.cCalls;
1808 while (cCallsLeft-- > 0)
1809 {
1810 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
1811 AssertReturn(off != UINT32_MAX, pTb);
1812
1813 pCallEntry++;
1814 }
1815
1816 /*
1817 * Emit the epilog code.
1818 */
1819 off = iemNativeEmitEpilog(pReNative, off);
1820 AssertReturn(off != UINT32_MAX, pTb);
1821
1822 /*
1823 * Make sure all labels has been defined.
1824 */
1825 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
1826#ifdef VBOX_STRICT
1827 uint32_t const cLabels = pReNative->cLabels;
1828 for (uint32_t i = 0; i < cLabels; i++)
1829 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
1830#endif
1831
1832 /*
1833 * Allocate executable memory, copy over the code we've generated.
1834 */
1835 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1836 if (pTbAllocator->pDelayedFreeHead)
1837 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
1838
1839 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
1840 AssertReturn(paFinalInstrBuf, pTb);
1841 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
1842
1843 /*
1844 * Apply fixups.
1845 */
1846 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
1847 uint32_t const cFixups = pReNative->cFixups;
1848 for (uint32_t i = 0; i < cFixups; i++)
1849 {
1850 Assert(paFixups[i].off < off);
1851 Assert(paFixups[i].idxLabel < cLabels);
1852 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
1853 switch (paFixups[i].enmType)
1854 {
1855#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1856 case kIemNativeFixupType_Rel32:
1857 Assert(paFixups[i].off + 4 <= off);
1858 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
1859 continue;
1860
1861#elif defined(RT_ARCH_ARM64)
1862 case kIemNativeFixupType_RelImm19At5:
1863 {
1864 Assert(paFixups[i].off < off);
1865 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
1866 Assert(offDisp >= -262144 && offDisp < 262144);
1867 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
1868 continue;
1869 }
1870#endif
1871 case kIemNativeFixupType_Invalid:
1872 case kIemNativeFixupType_End:
1873 break;
1874 }
1875 AssertFailed();
1876 }
1877
1878 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
1879
1880 /*
1881 * Convert the translation block.
1882 */
1883 //RT_BREAKPOINT();
1884 RTMemFree(pTb->Thrd.paCalls);
1885 pTb->Native.paInstructions = paFinalInstrBuf;
1886 pTb->Native.cInstructions = off;
1887 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
1888
1889 Assert(pTbAllocator->cThreadedTbs > 0);
1890 pTbAllocator->cThreadedTbs -= 1;
1891 pTbAllocator->cNativeTbs += 1;
1892 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
1893
1894 return pTb;
1895}
1896
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette