VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101163

Last change on this file since 101163 was 101163, checked in by vboxsync, 16 months ago

VMM/IEM: Experimental recompilation of threaded blocks into native code on linux.amd64. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 50.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101163 2023-09-18 20:44:24Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : ...
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): ...
18 * - Level 12 (Log12): ...
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/heap.h>
59#include <iprt/mem.h>
60#include <iprt/string.h>
61#ifdef RT_OS_WINDOWS
62/** @todo */
63#else
64# include <iprt/formats/dwarf.h>
65extern "C" void __register_frame_info(void *begin, void *ob);
66#endif
67
68#include "IEMInline.h"
69#include "IEMThreadedFunctions.h"
70
71
72/*
73 * Narrow down configs here to avoid wasting time on unused configs here.
74 * Note! Same checks in IEMAllThrdRecompiler.cpp.
75 */
76
77#ifndef IEM_WITH_CODE_TLB
78# error The code TLB must be enabled for the recompiler.
79#endif
80
81#ifndef IEM_WITH_DATA_TLB
82# error The data TLB must be enabled for the recompiler.
83#endif
84
85#ifndef IEM_WITH_SETJMP
86# error The setjmp approach must be enabled for the recompiler.
87#endif
88
89
90
91/*********************************************************************************************************************************
92* Executable Memory Allocator *
93*********************************************************************************************************************************/
94
95#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
96/**
97 * Per-chunk unwind info for non-windows hosts.
98 */
99typedef struct IEMEXECMEMCHUNKEHFRAME
100{
101 /** struct object storage area. */
102 uint8_t abObject[1024];
103 /** The dwarf ehframe data for the chunk. */
104 uint8_t abEhFrame[512];
105} IEMEXECMEMCHUNKEHFRAME;
106/** Pointer to per-chunk info info for non-windows hosts. */
107typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
108#endif
109
110
111/**
112 * An chunk of executable memory.
113 */
114typedef struct IEMEXECMEMCHUNK
115{
116 /** The heap handle. */
117 RTHEAPSIMPLE hHeap;
118 /** Pointer to the chunk. */
119 void *pvChunk;
120#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
121 /** Exception handling frame information for proper unwinding during C++
122 * throws and (possibly) longjmp(). */
123 PIEMEXECMEMCHUNKEHFRAME pEhFrame;
124#endif
125#ifdef IN_RING0
126 /** Allocation handle. */
127 RTR0MEMOBJ hMemObj;
128#endif
129} IEMEXECMEMCHUNK;
130/** Pointer to a memory chunk. */
131typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
132
133
134/**
135 * Executable memory allocator for the native recompiler.
136 */
137typedef struct IEMEXECMEMALLOCATOR
138{
139 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
140 uint32_t uMagic;
141
142 /** The chunk size. */
143 uint32_t cbChunk;
144 /** The maximum number of chunks. */
145 uint32_t cMaxChunks;
146 /** The current number of chunks. */
147 uint32_t cChunks;
148 /** Hint where to start looking for available memory. */
149 uint32_t idxChunkHint;
150 /** Statistics: Current number of allocations. */
151 uint32_t cAllocations;
152
153 /** The total amount of memory available. */
154 uint64_t cbTotal;
155 /** Total amount of free memory. */
156 uint64_t cbFree;
157 /** Total amount of memory allocated. */
158 uint64_t cbAllocated;
159
160 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
161 * @{ */
162 /** The size of the heap internal block header. This is used to adjust the
163 * request memory size to make sure there is exacly enough room for a header at
164 * the end of the blocks we allocate before the next 64 byte alignment line. */
165 uint32_t cbHeapBlockHdr;
166 /** The size of initial heap allocation required make sure the first
167 * allocation is correctly aligned. */
168 uint32_t cbHeapAlignTweak;
169 /** The alignment tweak allocation address. */
170 void *pvAlignTweak;
171 /** @} */
172
173#if defined(IN_RING3) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
174 PRUNTIME_FUNCTION paUnwindFunctions;
175#endif
176
177 /** The allocation chunks. */
178 RT_FLEXIBLE_ARRAY_EXTENSION
179 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
180} IEMEXECMEMALLOCATOR;
181/** Pointer to an executable memory allocator. */
182typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
183
184/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
185#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
186
187
188#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
189
190/**
191 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
192 */
193DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
194{
195 if (iValue >= 64)
196 {
197 Assert(iValue < 0x2000);
198 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
199 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
200 }
201 else if (iValue >= 0)
202 *Ptr.pb++ = (uint8_t)iValue;
203 else if (iValue > -64)
204 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
205 else
206 {
207 Assert(iValue > -0x2000);
208 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
209 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
210 }
211 return Ptr;
212}
213
214
215/**
216 * Emits an ULEB128 encoded value (up to 64-bit wide).
217 */
218DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
219{
220 while (uValue >= 0x80)
221 {
222 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
223 uValue >>= 7;
224 }
225 *Ptr.pb++ = (uint8_t)uValue;
226 return Ptr;
227}
228
229
230/**
231 * Emits a CFA rule as register @a uReg + offset @a off.
232 */
233DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
234{
235 *Ptr.pb++ = DW_CFA_def_cfa;
236 Ptr = iemDwarfPutUleb128(Ptr, uReg);
237 Ptr = iemDwarfPutUleb128(Ptr, off);
238 return Ptr;
239}
240
241
242/**
243 * Emits a register (@a uReg) save location:
244 * CFA + @a off * data_alignment_factor
245 */
246DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
247{
248 if (uReg < 0x40)
249 *Ptr.pb++ = DW_CFA_offset | uReg;
250 else
251 {
252 *Ptr.pb++ = DW_CFA_offset_extended;
253 Ptr = iemDwarfPutUleb128(Ptr, uReg);
254 }
255 Ptr = iemDwarfPutUleb128(Ptr, off);
256 return Ptr;
257}
258
259
260/**
261 * Emits a register (@a uReg) save location, using signed offset:
262 * CFA + @a offSigned * data_alignment_factor
263 */
264DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
265{
266 *Ptr.pb++ = DW_CFA_offset_extended_sf;
267 Ptr = iemDwarfPutUleb128(Ptr, uReg);
268 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
269 return Ptr;
270}
271
272
273/**
274 * Initalizes the unwind info section for non-windows hosts.
275 */
276static void iemExecMemAllocatorInitEhFrameForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator,
277 PIEMEXECMEMCHUNKEHFRAME pEhFrame, void *pvChunk)
278{
279 RTPTRUNION Ptr = { pEhFrame };
280
281 /*
282 * Generate the CIE first.
283 */
284 RTPTRUNION const PtrCie = Ptr;
285 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
286 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
287 *Ptr.pb++ = 4; /* DwARF v4. */
288 *Ptr.pb++ = 0; /* Augmentation. */
289 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
290 *Ptr.pb++ = 0; /* Segment selector size. */
291 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
292 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
293 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
294 /* Initial instructions: */
295 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
296 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
297 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
298 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
299 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
300 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
301 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
302 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
303 while ((Ptr.u - PtrCie.u) & 3)
304 *Ptr.pb++ = DW_CFA_nop;
305 /* Finalize the CIE size. */
306 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
307
308 /*
309 * Generate an FDE for the whole chunk area.
310 */
311 RTPTRUNION const PtrFde = Ptr;
312 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
313 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
314 Ptr.pu32++;
315 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
316 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
317 //*Ptr.pb++ = DW_CFA_nop; - not required for recent libgcc/glibc.
318 while ((Ptr.u - PtrFde.u) & 3)
319 *Ptr.pb++ = DW_CFA_nop;
320 /* Finalize the FDE size. */
321 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
322
323 /* Terminator entry. */
324 *Ptr.pu32++ = 0;
325 *Ptr.pu32++ = 0; /* just to be sure... */
326 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
327}
328
329#endif /* IN_RING3 && !RT_OS_WINDOWS */
330
331
332/**
333 * Adds another chunk to the executable memory allocator.
334 *
335 * This is used by the init code for the initial allocation and later by the
336 * regular allocator function when it's out of memory.
337 */
338static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator)
339{
340 /* Check that we've room for growth. */
341 uint32_t const idxChunk = pExecMemAllocator->cChunks;
342 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
343
344 /* Allocate a chunk. */
345 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
346 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
347
348 /* Initialize the heap for the chunk. */
349 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
350 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
351 AssertRC(rc);
352 if (RT_SUCCESS(rc))
353 {
354 /*
355 * We want the memory to be aligned on 64 byte, so the first time thru
356 * here we do some exploratory allocations to see how we can achieve this.
357 * On subsequent runs we only make an initial adjustment allocation, if
358 * necessary.
359 *
360 * Since we own the heap implementation, we know that the internal block
361 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
362 * so all we need to wrt allocation size adjustments is to add 32 bytes
363 * to the size, align up by 64 bytes, and subtract 32 bytes.
364 *
365 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
366 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
367 * allocation to force subsequent allocations to return 64 byte aligned
368 * user areas.
369 */
370 if (!pExecMemAllocator->cbHeapBlockHdr)
371 {
372 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
373 pExecMemAllocator->cbHeapAlignTweak = 64;
374 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
375 32 /*cbAlignment*/);
376 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
377
378 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
379 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
380 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
381 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
382 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
383
384 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
385 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
386 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
387 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
388 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
389
390 RTHeapSimpleFree(hHeap, pvTest2);
391 RTHeapSimpleFree(hHeap, pvTest1);
392 }
393 else
394 {
395 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
396 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
397 }
398 if (RT_SUCCESS(rc))
399 {
400#ifdef IN_RING3
401# ifdef RT_OS_WINDOWS
402 /*
403 * Register the runtime function table for this chunk.
404 * We can share the data structure on windows.
405 */
406 /** @todo */
407# else
408 /*
409 * Generate an .eh_frame section for the chunk and register it so
410 * the unwinding code works (required for C++ exceptions and
411 * probably also for longjmp()).
412 */
413 PIEMEXECMEMCHUNKEHFRAME pEhFrame = (PIEMEXECMEMCHUNKEHFRAME)RTMemAllocZ(sizeof(IEMEXECMEMCHUNKEHFRAME));
414 if (pEhFrame)
415 {
416 iemExecMemAllocatorInitEhFrameForChunk(pExecMemAllocator, pEhFrame, pvChunk);
417 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
418 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
419 }
420 else
421 rc = VERR_NO_MEMORY;
422# endif
423 if (RT_SUCCESS(rc))
424#endif
425 {
426 /*
427 * Finalize the adding of the chunk.
428 */
429 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
430 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
431#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
432 pExecMemAllocator->aChunks[idxChunk].pEhFrame = pEhFrame;
433#endif
434
435 pExecMemAllocator->cChunks = idxChunk + 1;
436 pExecMemAllocator->idxChunkHint = idxChunk;
437
438 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
439 pExecMemAllocator->cbTotal += cbFree;
440 pExecMemAllocator->cbFree += cbFree;
441
442 return VINF_SUCCESS;
443 }
444 }
445 }
446 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
447 return rc;
448}
449
450
451/**
452 * Initializes the executable memory allocator for native recompilation on the
453 * calling EMT.
454 *
455 * @returns VBox status code.
456 * @param pVCpu The cross context virtual CPU structure of the calling
457 * thread.
458 * @param cbMax The max size of the allocator.
459 * @param cbInitial The initial allocator size.
460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
461 * dependent).
462 */
463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
464{
465 /*
466 * Validate input.
467 */
468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
471 || cbChunk == 0
472 || ( RT_IS_POWER_OF_TWO(cbChunk)
473 && cbChunk >= _1M
474 && cbChunk <= _256M
475 && cbChunk <= cbMax),
476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
477 VERR_OUT_OF_RANGE);
478
479 /*
480 * Adjust/figure out the chunk size.
481 */
482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
483 {
484 if (cbMax >= _256M)
485 cbChunk = _64M;
486 else
487 {
488 if (cbMax < _16M)
489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
490 else
491 cbChunk = (uint32_t)cbMax / 4;
492 if (!RT_IS_POWER_OF_TWO(cbChunk))
493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
494 }
495 }
496
497 if (cbChunk > cbMax)
498 cbMax = cbChunk;
499 else
500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
503
504 /*
505 * Allocate and initialize the allocatore instance.
506 */
507#if defined(IN_RING3) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
508 size_t const cbExtra = sizeof(RUNTIME_FUNCTION) + 0; /** @todo */
509#else
510 size_t const cbExtra = 0;
511#endif
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ( RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR,
513 aChunks[cMaxChunks])
514 + cbExtra);
515 AssertReturn(pExecMemAllocator, VERR_NO_MEMORY);
516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
517 pExecMemAllocator->cbChunk = cbChunk;
518 pExecMemAllocator->cMaxChunks = cMaxChunks;
519 pExecMemAllocator->cChunks = 0;
520 pExecMemAllocator->idxChunkHint = 0;
521 pExecMemAllocator->cAllocations = 0;
522 pExecMemAllocator->cbTotal = 0;
523 pExecMemAllocator->cbFree = 0;
524 pExecMemAllocator->cbAllocated = 0;
525#if defined(IN_RING3) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
526 pExecMemAllocator->paUnwindFunctions = (PRUNTIME_FUNCTION)&pExecMemAllocator->aChunks[cMaxChunks];
527 /** @todo */
528#endif
529 for (uint32_t i = 0; i < cMaxChunks; i++)
530 {
531 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
532 pExecMemAllocator->aChunks[i].pvChunk = NULL;
533#ifdef IN_RING0
534 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
535#elif !defined(RT_OS_WINDOWS)
536 pExecMemAllocator->aChunks[i].pEhFrame = NULL;
537#endif
538 }
539 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
540
541 /*
542 * Do the initial allocations.
543 */
544 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
545 {
546 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
547 AssertLogRelRCReturn(rc, rc);
548 }
549
550 pExecMemAllocator->idxChunkHint = 0;
551
552 return VINF_SUCCESS;
553}
554
555/**
556 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
557 * the heap statistics.
558 */
559DECL_FORCE_INLINE(void *) iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
560 uint32_t cbReq, uint32_t idxChunk)
561{
562 pExecMemAllocator->cAllocations += 1;
563 pExecMemAllocator->cbAllocated += cbReq;
564 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
565 pExecMemAllocator->idxChunkHint = idxChunk;
566 return pvRet;
567}
568
569
570/**
571 * Allocates @a cbReq bytes of executable memory.
572 *
573 * @returns Pointer to the memory, NULL if out of memory or other problem
574 * encountered.
575 * @param pVCpu The cross context virtual CPU structure of the calling
576 * thread.
577 * @param cbReq How many bytes are required.
578 */
579static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
580{
581 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
582 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
583 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
584
585 /*
586 * Adjust the request size as per the logic described in
587 * iemExecMemAllocatorGrow and attempt to allocate it from one of the
588 * existing chunks if we think we've got sufficient free memory around.
589 */
590 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
591 if (cbReq <= pExecMemAllocator->cbFree)
592 {
593 uint32_t const cChunks = pExecMemAllocator->cChunks;
594 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
595 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
596 {
597 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
598 if (pvRet)
599 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
600 }
601 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
602 {
603 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
604 if (pvRet)
605 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
606 }
607 }
608
609 /*
610 * Can we grow it with another chunk?
611 */
612 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
613 {
614 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
615 AssertLogRelRCReturn(rc, NULL);
616
617 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
618 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
619 if (pvRet)
620 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
621 AssertFailed();
622 }
623
624 /* What now? Prune native translation blocks from the cache? */
625 AssertFailed();
626 return NULL;
627}
628
629
630/** This is a hook that we may need later for changing memory protection back
631 * to readonly+exec */
632static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
633{
634 RT_NOREF(pVCpu, pv, cb);
635}
636
637
638/**
639 * Frees executable memory.
640 */
641void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
642{
643 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
644 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
645 Assert(pv);
646
647 /* Align the size as we did when allocating the block. */
648 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
649
650 /* Assert sanity if strict build. */
651#ifdef VBOX_STRICT
652 uint32_t const cChunks = pExecMemAllocator->cChunks;
653 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
654 bool fFound = false;
655 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
656 {
657 fFound = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk < cbChunk;
658 if (fFound)
659 {
660 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
661 break;
662 }
663 }
664 Assert(fFound);
665#endif
666
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Do the actual freeing. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674}
675
676
677/*********************************************************************************************************************************
678* Native Recompilation *
679*********************************************************************************************************************************/
680
681/** Native code generator label types. */
682typedef enum
683{
684 kIemNativeLabelType_Invalid = 0,
685 kIemNativeLabelType_Return,
686 kIemNativeLabelType_NonZeroRetOrPassUp,
687 kIemNativeLabelType_End
688} IEMNATIVELABELTYPE;
689
690/** Native code generator label definition. */
691typedef struct IEMNATIVELABEL
692{
693 /** Code offset if defined, UINT32_MAX if it needs to be generated after/in
694 * the epilog. */
695 uint32_t off;
696 /** The type of label (IEMNATIVELABELTYPE). */
697 uint16_t enmType;
698 /** Additional label data, type specific. */
699 uint16_t uData;
700} IEMNATIVELABEL;
701/** Pointer to a label. */
702typedef IEMNATIVELABEL *PIEMNATIVELABEL;
703
704
705/** Native code generator fixup types. */
706typedef enum
707{
708 kIemNativeFixupType_Invalid = 0,
709#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
710 /** AMD64 fixup: PC relative 32-bit with addend in bData. */
711 kIemNativeFixupType_Rel32,
712#elif defined(RT_ARCH_ARM64)
713#endif
714 kIemNativeFixupType_End
715} IEMNATIVEFIXUPTYPE;
716
717/** Native code generator fixup. */
718typedef struct IEMNATIVEFIXUP
719{
720 /** Code offset of the fixup location. */
721 uint32_t off;
722 /** The IEMNATIVELABEL this is a fixup for. */
723 uint16_t idxLabel;
724 /** The fixup type (IEMNATIVEFIXUPTYPE). */
725 uint8_t enmType;
726 /** Addend or other data. */
727 int8_t offAddend;
728} IEMNATIVEFIXUP;
729
730typedef IEMNATIVEFIXUP *PIEMNATIVEFIXUP;
731
732
733/**
734 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
735 */
736IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
737{
738 pVCpu->iem.s.cInstructions += idxInstr;
739 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
740}
741
742
743static void iemNativeReInit(PVMCPUCC pVCpu)
744{
745 pVCpu->iem.s.Native.cLabels = 0;
746 pVCpu->iem.s.Native.cFixups = 0;
747}
748
749
750static bool iemNativeInit(PVMCPUCC pVCpu)
751{
752 /*
753 * Try allocate all the buffers and stuff we need.
754 */
755 pVCpu->iem.s.Native.pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
756 pVCpu->iem.s.Native.paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
757 pVCpu->iem.s.Native.paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
758 if (RT_LIKELY( pVCpu->iem.s.Native.pInstrBuf
759 && pVCpu->iem.s.Native.paLabels
760 && pVCpu->iem.s.Native.paFixups))
761 {
762 /*
763 * Set the buffer & array sizes on success.
764 */
765 pVCpu->iem.s.Native.cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
766 pVCpu->iem.s.Native.cLabelsAlloc = _8K;
767 pVCpu->iem.s.Native.cFixupsAlloc = _16K;
768 iemNativeReInit(pVCpu);
769 return true;
770 }
771
772 /*
773 * Failed. Cleanup and the reset state.
774 */
775 AssertFailed();
776 RTMemFree(pVCpu->iem.s.Native.pInstrBuf);
777 RTMemFree(pVCpu->iem.s.Native.paLabels);
778 RTMemFree(pVCpu->iem.s.Native.paFixups);
779 pVCpu->iem.s.Native.pInstrBuf = NULL;
780 pVCpu->iem.s.Native.paLabels = NULL;
781 pVCpu->iem.s.Native.paFixups = NULL;
782 return false;
783}
784
785
786static uint32_t iemNativeMakeLabel(PVMCPUCC pVCpu, IEMNATIVELABELTYPE enmType,
787 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0)
788{
789 /*
790 * Do we have the label already?
791 */
792 PIEMNATIVELABEL paLabels = pVCpu->iem.s.Native.paLabels;
793 uint32_t const cLabels = pVCpu->iem.s.Native.cLabels;
794 for (uint32_t i = 0; i < cLabels; i++)
795 if ( paLabels[i].enmType == enmType
796 && paLabels[i].uData == uData)
797 {
798 if (paLabels[i].off == offWhere || offWhere == UINT32_MAX)
799 return i;
800 if (paLabels[i].off == UINT32_MAX)
801 {
802 paLabels[i].off = offWhere;
803 return i;
804 }
805 }
806
807 /*
808 * Make sure we've got room for another label.
809 */
810 if (RT_LIKELY(cLabels < pVCpu->iem.s.Native.cLabelsAlloc))
811 { /* likely */ }
812 else
813 {
814 uint32_t cNew = pVCpu->iem.s.Native.cLabelsAlloc;
815 AssertReturn(cNew, UINT32_MAX);
816 AssertReturn(cLabels == cNew, UINT32_MAX);
817 cNew *= 2;
818 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
819 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
820 AssertReturn(paLabels, UINT32_MAX);
821 pVCpu->iem.s.Native.paLabels = paLabels;
822 pVCpu->iem.s.Native.cLabelsAlloc = cNew;
823 }
824
825 /*
826 * Define a new label.
827 */
828 paLabels[cLabels].off = offWhere;
829 paLabels[cLabels].enmType = enmType;
830 paLabels[cLabels].uData = uData;
831 pVCpu->iem.s.Native.cLabels = cLabels + 1;
832 return cLabels;
833}
834
835
836static uint32_t iemNativeFindLabel(PVMCPUCC pVCpu, IEMNATIVELABELTYPE enmType,
837 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0)
838{
839 PIEMNATIVELABEL paLabels = pVCpu->iem.s.Native.paLabels;
840 uint32_t const cLabels = pVCpu->iem.s.Native.cLabels;
841 for (uint32_t i = 0; i < cLabels; i++)
842 if ( paLabels[i].enmType == enmType
843 && paLabels[i].uData == uData
844 && ( paLabels[i].off == offWhere
845 || offWhere == UINT32_MAX
846 || paLabels[i].off == UINT32_MAX))
847 return i;
848 return UINT32_MAX;
849}
850
851
852
853static bool iemNativeAddFixup(PVMCPUCC pVCpu, uint32_t offWhere, uint32_t idxLabel,
854 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend = 0)
855{
856 Assert(idxLabel <= UINT16_MAX);
857 Assert((unsigned)enmType <= UINT8_MAX);
858
859 /*
860 * Make sure we've room.
861 */
862 PIEMNATIVEFIXUP paFixups = pVCpu->iem.s.Native.paFixups;
863 uint32_t const cFixups = pVCpu->iem.s.Native.cFixups;
864 if (RT_LIKELY(cFixups < pVCpu->iem.s.Native.cFixupsAlloc))
865 { /* likely */ }
866 else
867 {
868 uint32_t cNew = pVCpu->iem.s.Native.cFixupsAlloc;
869 AssertReturn(cNew, false);
870 AssertReturn(cFixups == cNew, false);
871 cNew *= 2;
872 AssertReturn(cNew <= _128K, false);
873 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
874 AssertReturn(paFixups, false);
875 pVCpu->iem.s.Native.paFixups = paFixups;
876 pVCpu->iem.s.Native.cFixupsAlloc = cNew;
877 }
878
879 /*
880 * Add the fixup.
881 */
882 paFixups[cFixups].off = offWhere;
883 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
884 paFixups[cFixups].enmType = enmType;
885 paFixups[cFixups].offAddend = offAddend;
886 pVCpu->iem.s.Native.cFixups = cFixups + 1;
887 return true;
888}
889
890
891static PIEMNATIVEINSTR iemNativeInstrBufEnsureSlow(PVMCPUCC pVCpu, uint32_t off, uint32_t cInstrReq)
892{
893 /* Double the buffer size till we meet the request. */
894 uint32_t cNew = pVCpu->iem.s.Native.cInstrBufAlloc;
895 AssertReturn(cNew > 0, NULL);
896 do
897 cNew *= 2;
898 while (cNew < off + cInstrReq);
899
900 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
901 AssertReturn(cbNew <= _2M, NULL);
902
903 void *pvNew = RTMemRealloc(pVCpu->iem.s.Native.pInstrBuf, cbNew);
904 AssertReturn(pvNew, NULL);
905
906 pVCpu->iem.s.Native.cInstrBufAlloc = cNew;
907 return pVCpu->iem.s.Native.pInstrBuf = (PIEMNATIVEINSTR)pvNew;
908}
909
910
911DECL_FORCE_INLINE(PIEMNATIVEINSTR) iemNativeInstrBufEnsure(PVMCPUCC pVCpu, uint32_t off, uint32_t cInstrReq)
912{
913 if (RT_LIKELY(off + cInstrReq <= pVCpu->iem.s.Native.cInstrBufAlloc))
914 return pVCpu->iem.s.Native.pInstrBuf;
915 return iemNativeInstrBufEnsureSlow(pVCpu, off, cInstrReq);
916}
917
918
919/**
920 * Emit a simple marker instruction to more easily tell where something starts
921 * in the disassembly.
922 */
923uint32_t iemNativeEmitMarker(PVMCPUCC pVCpu, uint32_t off)
924{
925#ifdef RT_ARCH_AMD64
926 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 1);
927 AssertReturn(pbCodeBuf, UINT32_MAX);
928 pbCodeBuf[off++] = 0x90; /* nop */
929
930#elif RT_ARCH_ARM64
931 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 1);
932 pu32CodeBuf[off++] = 0xe503201f; /* nop? */
933
934#else
935# error "port me"
936#endif
937 return off;
938}
939
940
941static uint32_t iemNativeEmitGprZero(PVMCPUCC pVCpu, uint32_t off, uint8_t iGpr)
942{
943#ifdef RT_ARCH_AMD64
944 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 3);
945 AssertReturn(pbCodeBuf, UINT32_MAX);
946 if (iGpr >= 8) /* xor gpr32, gpr32 */
947 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
948 pbCodeBuf[off++] = 0x33;
949 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
950
951#elif RT_ARCH_ARM64
952 RT_NOREF(pVCpu, iGpr, uImm64);
953 off = UINT32_MAX;
954
955#else
956# error "port me"
957#endif
958 RT_NOREF(pVCpu);
959 return off;
960}
961
962
963static uint32_t iemNativeEmitLoadGprImm64(PVMCPUCC pVCpu, uint32_t off, uint8_t iGpr, uint64_t uImm64)
964{
965 if (!uImm64)
966 return iemNativeEmitGprZero(pVCpu, off, iGpr);
967
968#ifdef RT_ARCH_AMD64
969 if (uImm64 <= UINT32_MAX)
970 {
971 /* mov gpr, imm32 */
972 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 6);
973 AssertReturn(pbCodeBuf, UINT32_MAX);
974 if (iGpr >= 8)
975 pbCodeBuf[off++] = X86_OP_REX_R;
976 pbCodeBuf[off++] = 0xb8 + (iGpr & 7);
977 pbCodeBuf[off++] = RT_BYTE1(uImm64);
978 pbCodeBuf[off++] = RT_BYTE2(uImm64);
979 pbCodeBuf[off++] = RT_BYTE3(uImm64);
980 pbCodeBuf[off++] = RT_BYTE4(uImm64);
981 }
982 else
983 {
984 /* mov gpr, imm64 */
985 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 10);
986 AssertReturn(pbCodeBuf, UINT32_MAX);
987 if (iGpr < 8)
988 pbCodeBuf[off++] = X86_OP_REX_W;
989 else
990 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
991 pbCodeBuf[off++] = 0xb8 + (iGpr & 7);
992 pbCodeBuf[off++] = RT_BYTE1(uImm64);
993 pbCodeBuf[off++] = RT_BYTE2(uImm64);
994 pbCodeBuf[off++] = RT_BYTE3(uImm64);
995 pbCodeBuf[off++] = RT_BYTE4(uImm64);
996 pbCodeBuf[off++] = RT_BYTE5(uImm64);
997 pbCodeBuf[off++] = RT_BYTE6(uImm64);
998 pbCodeBuf[off++] = RT_BYTE7(uImm64);
999 pbCodeBuf[off++] = RT_BYTE8(uImm64);
1000 }
1001
1002#elif RT_ARCH_ARM64
1003 RT_NOREF(pVCpu, iGpr, uImm64);
1004 off = UINT32_MAX;
1005
1006#else
1007# error "port me"
1008#endif
1009 return off;
1010}
1011
1012
1013static uint32_t iemNativeEmitLoadGprFromVCpuU32(PVMCPUCC pVCpu, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1014{
1015#ifdef RT_ARCH_AMD64
1016 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 7);
1017 AssertReturn(pbCodeBuf, UINT32_MAX);
1018
1019 /* mov reg32, mem32 */
1020 if (iGpr >= 8)
1021 pbCodeBuf[off++] = X86_OP_REX_R;
1022 pbCodeBuf[off++] = 0x8b;
1023 if (offVCpu < 128)
1024 {
1025 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGpr & 7, X86_GREG_xBX);
1026 pbCodeBuf[off++] = (uint8_t)offVCpu;
1027 }
1028 else
1029 {
1030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGpr & 7, X86_GREG_xBX);
1031 pbCodeBuf[off++] = RT_BYTE1(offVCpu);
1032 pbCodeBuf[off++] = RT_BYTE2(offVCpu);
1033 pbCodeBuf[off++] = RT_BYTE3(offVCpu);
1034 pbCodeBuf[off++] = RT_BYTE4(offVCpu);
1035 }
1036
1037#elif RT_ARCH_ARM64
1038 RT_NOREF(pVCpu, idxInstr);
1039 off = UINT32_MAX;
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048static uint32_t iemNativeEmitLoadGprFromGpr(PVMCPUCC pVCpu, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1049{
1050#ifdef RT_ARCH_AMD64
1051 /* mov gprdst, gprsrc */
1052 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 3);
1053 AssertReturn(pbCodeBuf, UINT32_MAX);
1054 if ((iGprDst | iGprSrc) >= 8)
1055 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1056 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1057 : X86_OP_REX_W | X86_OP_REX_R;
1058 else
1059 pbCodeBuf[off++] = X86_OP_REX_W;
1060 pbCodeBuf[off++] = 0x8b;
1061 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1062
1063#elif RT_ARCH_ARM64
1064 RT_NOREF(pVCpu, iGprDst, iGprSrc);
1065 off = UINT32_MAX;
1066
1067#else
1068# error "port me"
1069#endif
1070 return off;
1071}
1072
1073
1074static uint32_t iemNativeEmitCheckCallRetAndPassUp(PVMCPUCC pVCpu, uint32_t off, uint8_t idxInstr)
1075{
1076#ifdef RT_ARCH_AMD64
1077 /* eax = call status code.*/
1078
1079 /* edx = rcPassUp */
1080 off = iemNativeEmitLoadGprFromVCpuU32(pVCpu, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
1081 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1082
1083 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 10);
1084 AssertReturn(pbCodeBuf, UINT32_MAX);
1085
1086 /* edx = eax | rcPassUp*/
1087 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
1088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
1089
1090 /* Jump to non-zero status return path, loading cl with the instruction number. */
1091 pbCodeBuf[off++] = 0xb0 + X86_GREG_xCX; /* mov cl, imm8 (pCallEntry->idxInstr) */
1092 pbCodeBuf[off++] = idxInstr;
1093
1094 pbCodeBuf[off++] = 0x0f; /* jnz rel32 */
1095 pbCodeBuf[off++] = 0x85;
1096 uint32_t const idxLabel = iemNativeMakeLabel(pVCpu, kIemNativeLabelType_NonZeroRetOrPassUp);
1097 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
1098 AssertReturn(iemNativeAddFixup(pVCpu, off, idxLabel, kIemNativeFixupType_Rel32, -4), UINT32_MAX);
1099 pbCodeBuf[off++] = 0x00;
1100 pbCodeBuf[off++] = 0x00;
1101 pbCodeBuf[off++] = 0x00;
1102 pbCodeBuf[off++] = 0x00;
1103
1104 /* done. */
1105
1106#elif RT_ARCH_ARM64
1107 RT_NOREF(pVCpu, idxInstr);
1108 off = UINT32_MAX;
1109
1110#else
1111# error "port me"
1112#endif
1113 return off;
1114}
1115
1116
1117static uint32_t iemNativeEmitThreadedCall(PVMCPUCC pVCpu, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
1118{
1119#ifdef VBOX_STRICT
1120 off = iemNativeEmitMarker(pVCpu, off);
1121 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1122#endif
1123
1124#ifdef RT_ARCH_AMD64
1125 /* Load the parameters and emit the call. */
1126# ifdef RT_OS_WINDOWS
1127 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xCX, X86_GREG_xBX);
1128 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1129 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_xDX, pCallEntry->auParams[0]);
1130 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1131 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_x8, pCallEntry->auParams[1]);
1132 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1133 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_x9, pCallEntry->auParams[2]);
1134 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1135# else
1136 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xDI, X86_GREG_xBX);
1137 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1138 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_xSI, pCallEntry->auParams[0]);
1139 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1140 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_xDX, pCallEntry->auParams[1]);
1141 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1142 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_xCX, pCallEntry->auParams[2]);
1143 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1144# endif
1145 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_xAX, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
1146 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1147
1148 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 2);
1149 AssertReturn(pbCodeBuf, UINT32_MAX);
1150 pbCodeBuf[off++] = 0xff; /* call rax */
1151 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
1152
1153 /* Check the status code. */
1154 off = iemNativeEmitCheckCallRetAndPassUp(pVCpu, off, pCallEntry->idxInstr);
1155 AssertReturn(off != UINT32_MAX, off);
1156
1157
1158#elif RT_ARCH_ARM64
1159 RT_NOREF(pVCpu, pCallEntry);
1160 off = UINT32_MAX;
1161
1162#else
1163# error "port me"
1164#endif
1165 return off;
1166}
1167
1168
1169static uint32_t iemNativeEmitEpilog(PVMCPUCC pVCpu, uint32_t off)
1170{
1171#ifdef RT_ARCH_AMD64
1172 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 20);
1173 AssertReturn(pbCodeBuf, UINT32_MAX);
1174
1175 /*
1176 * Successful return, so clear eax.
1177 */
1178 pbCodeBuf[off++] = 0x33; /* xor eax, eax */
1179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xAX, X86_GREG_xAX);
1180
1181 /*
1182 * Define label for common return point.
1183 */
1184 uint32_t const idxReturn = iemNativeMakeLabel(pVCpu, kIemNativeLabelType_Return, off);
1185 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
1186
1187 /* Reposition esp at the r15 restore point. */
1188 pbCodeBuf[off++] = X86_OP_REX_W;
1189 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
1190 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
1191# ifdef RT_OS_WINDOWS
1192 pbCodeBuf[off++] = (uint8_t)(-7 * 8);
1193# else
1194 pbCodeBuf[off++] = (uint8_t)(-5 * 8);
1195# endif
1196
1197 /* Pop non-volatile registers and return */
1198 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
1199 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
1200 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
1201 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
1202 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
1203 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
1204 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
1205 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
1206# ifdef RT_OS_WINDOWS
1207 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
1208 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
1209# endif
1210 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
1211 pbCodeBuf[off++] = 0xc9; /* leave */
1212 pbCodeBuf[off++] = 0xc3; /* ret */
1213 pbCodeBuf[off++] = 0xcc; /* int3 poison */
1214
1215 /*
1216 * Generate the rc + rcPassUp fiddling code if needed.
1217 */
1218 uint32_t idxLabel = iemNativeFindLabel(pVCpu, kIemNativeLabelType_NonZeroRetOrPassUp);
1219 if (idxLabel != UINT32_MAX)
1220 {
1221 Assert(pVCpu->iem.s.Native.paLabels[idxLabel].off == UINT32_MAX);
1222 pVCpu->iem.s.Native.paLabels[idxLabel].off = off;
1223
1224 /* Call helper and jump to return point. */
1225# ifdef RT_OS_WINDOWS
1226 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xR8, X86_GREG_xCX); /* cl = instruction number */
1227 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1228 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xCX, X86_GREG_xBX);
1229 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1230 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xDX, X86_GREG_xAX);
1231 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1232# else
1233 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xDI, X86_GREG_xBX);
1234 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1235 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xSI, X86_GREG_xAX);
1236 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1237 off = iemNativeEmitLoadGprFromGpr(pVCpu, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
1238 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1239# endif
1240 off = iemNativeEmitLoadGprImm64(pVCpu, off, X86_GREG_xAX, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
1241 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1242
1243 pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 10);
1244 AssertReturn(pbCodeBuf, UINT32_MAX);
1245 pbCodeBuf[off++] = 0xff; /* call rax */
1246 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
1247
1248 /* Jump to common return point. */
1249 uint32_t offRel = pVCpu->iem.s.Native.paLabels[idxReturn].off - (off + 2);
1250 if (-(int32_t)offRel <= 127)
1251 {
1252 pbCodeBuf[off++] = 0xeb; /* jmp rel8 */
1253 pbCodeBuf[off++] = (uint8_t)offRel;
1254 off++;
1255 }
1256 else
1257 {
1258 offRel -= 3;
1259 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */
1260 pbCodeBuf[off++] = RT_BYTE1(offRel);
1261 pbCodeBuf[off++] = RT_BYTE2(offRel);
1262 pbCodeBuf[off++] = RT_BYTE3(offRel);
1263 pbCodeBuf[off++] = RT_BYTE4(offRel);
1264 }
1265 pbCodeBuf[off++] = 0xcc; /* int3 poison */
1266 }
1267
1268#elif RT_ARCH_ARM64
1269 RT_NOREF(pVCpu);
1270 off = UINT32_MAX;
1271
1272#else
1273# error "port me"
1274#endif
1275 return off;
1276}
1277
1278
1279static uint32_t iemNativeEmitProlog(PVMCPUCC pVCpu, uint32_t off)
1280{
1281#ifdef RT_ARCH_AMD64
1282 /*
1283 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
1284 * reserving 64 bytes for stack variables plus 4 non-register argument
1285 * slots. Fixed register assignment: xBX = pVCpu;
1286 *
1287 * Since we always do the same register spilling, we can use the same
1288 * unwind description for all the code.
1289 */
1290 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pVCpu, off, 32);
1291 AssertReturn(pbCodeBuf, UINT32_MAX);
1292 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
1293 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
1294 pbCodeBuf[off++] = 0x8b;
1295 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
1296 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
1297# ifdef RT_OS_WINDOWS
1298 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
1299 pbCodeBuf[off++] = 0x8b;
1300 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
1301 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
1302 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
1303# else
1304 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
1305 pbCodeBuf[off++] = 0x8b;
1306 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
1307# endif
1308 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
1309 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
1310 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
1311 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
1312 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
1313 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
1314 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
1315 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
1316
1317 pbCodeBuf[off++] = X86_OP_REX_W; /* sub rsp, byte 28h */
1318 pbCodeBuf[off++] = 0x83;
1319 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, X86_GREG_xSP);
1320 pbCodeBuf[off++] = 0x40 /* for variables */
1321 + 8 /* stack alignment correction */
1322 + 4 * 8 /* 4 non-register arguments */
1323# ifdef RT_OS_WINDOWS
1324 + 0x20 /* register argument spill area for windows calling convention */
1325# endif
1326 ;
1327
1328#elif RT_ARCH_ARM64
1329 RT_NOREF(pVCpu);
1330 off = UINT32_MAX;
1331
1332#else
1333# error "port me"
1334#endif
1335 return off;
1336}
1337
1338
1339/**
1340 * Recompiles the given threaded TB into a native one.
1341 *
1342 * In case of failure the translation block will be returned as-is.
1343 *
1344 * @returns pTb.
1345 * @param pVCpu The cross context virtual CPU structure of the calling
1346 * thread.
1347 * @param pTb The threaded translation to recompile to native.
1348 */
1349PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
1350{
1351 /*
1352 * The first time thru, we allocate the recompiler state, the other times
1353 * we just need to reset it before using it again.
1354 */
1355 if (RT_LIKELY(pVCpu->iem.s.Native.pInstrBuf))
1356 iemNativeReInit(pVCpu);
1357 else
1358 AssertReturn(iemNativeInit(pVCpu), pTb);
1359
1360 /*
1361 * Emit prolog code (fixed atm).
1362 */
1363 uint32_t off = iemNativeEmitProlog(pVCpu, 0);
1364 AssertReturn(off != UINT32_MAX, pTb);
1365
1366 /*
1367 * Convert the calls to native code.
1368 */
1369 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
1370 uint32_t cCallsLeft = pTb->Thrd.cCalls;
1371 while (cCallsLeft-- > 0)
1372 {
1373 off = iemNativeEmitThreadedCall(pVCpu, off, pCallEntry);
1374 AssertReturn(off != UINT32_MAX, pTb);
1375
1376 pCallEntry++;
1377 }
1378
1379 /*
1380 * Emit the epilog code.
1381 */
1382 off = iemNativeEmitEpilog(pVCpu, off);
1383 AssertReturn(off != UINT32_MAX, pTb);
1384
1385 /*
1386 * Make sure all labels has been defined.
1387 */
1388 PIEMNATIVELABEL const paLabels = pVCpu->iem.s.Native.paLabels;
1389#ifdef VBOX_STRICT
1390 uint32_t const cLabels = pVCpu->iem.s.Native.cLabels;
1391 for (uint32_t i = 0; i < cLabels; i++)
1392 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
1393#endif
1394
1395 /*
1396 * Allocate executable memory, copy over the code we've generated.
1397 */
1398 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1399 if (pTbAllocator->pDelayedFreeHead)
1400 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
1401
1402 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
1403 AssertReturn(paFinalInstrBuf, pTb);
1404 memcpy(paFinalInstrBuf, pVCpu->iem.s.Native.pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
1405
1406 /*
1407 * Apply fixups.
1408 */
1409 PIEMNATIVEFIXUP const paFixups = pVCpu->iem.s.Native.paFixups;
1410 uint32_t const cFixups = pVCpu->iem.s.Native.cFixups;
1411 for (uint32_t i = 0; i < cFixups; i++)
1412 {
1413 Assert(paFixups[i].off < off);
1414 Assert(paFixups[i].idxLabel < cLabels);
1415 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
1416 switch (paFixups[i].enmType)
1417 {
1418#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1419 case kIemNativeFixupType_Rel32:
1420 Assert(paFixups[i].off + 4 <= off);
1421 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
1422 continue;
1423
1424#elif defined(RT_ARCH_ARM64)
1425#endif
1426 case kIemNativeFixupType_Invalid:
1427 case kIemNativeFixupType_End:
1428 break;
1429 }
1430 AssertFailed();
1431 }
1432
1433 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
1434
1435 /*
1436 * Convert the translation block.
1437 */
1438 //RT_BREAKPOINT();
1439 RTMemFree(pTb->Thrd.paCalls);
1440 pTb->Native.paInstructions = paFinalInstrBuf;
1441 pTb->Native.cInstructions = off;
1442 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
1443
1444 Assert(pTbAllocator->cThreadedTbs > 0);
1445 pTbAllocator->cThreadedTbs -= 1;
1446 pTbAllocator->cNativeTbs += 1;
1447 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
1448
1449 return pTb;
1450}
1451
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette