VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101912

Last change on this file since 101912 was 101912, checked in by vboxsync, 17 months ago

VMM/IEM: doxygen fix. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 293.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101912 2023-11-07 01:33:15Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMNativeFunctions.h"
102
103
104/*
105 * Narrow down configs here to avoid wasting time on unused configs here.
106 * Note! Same checks in IEMAllThrdRecompiler.cpp.
107 */
108
109#ifndef IEM_WITH_CODE_TLB
110# error The code TLB must be enabled for the recompiler.
111#endif
112
113#ifndef IEM_WITH_DATA_TLB
114# error The data TLB must be enabled for the recompiler.
115#endif
116
117#ifndef IEM_WITH_SETJMP
118# error The setjmp approach must be enabled for the recompiler.
119#endif
120
121/** @todo eliminate this clang build hack. */
122#if RT_CLANG_PREREQ(4, 0)
123# pragma GCC diagnostic ignored "-Wunused-function"
124#endif
125
126
127
128/*********************************************************************************************************************************
129* Defined Constants And Macros *
130*********************************************************************************************************************************/
131/** Always count instructions for now. */
132#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
133
134
135/*********************************************************************************************************************************
136* Internal Functions *
137*********************************************************************************************************************************/
138#ifdef VBOX_STRICT
139static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
140 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
141#endif
142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
143static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
144static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
145#endif
146
147
148/*********************************************************************************************************************************
149* Executable Memory Allocator *
150*********************************************************************************************************************************/
151/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
152 * Use an alternative chunk sub-allocator that does store internal data
153 * in the chunk.
154 *
155 * Using the RTHeapSimple is not practial on newer darwin systems where
156 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
157 * memory. We would have to change the protection of the whole chunk for
158 * every call to RTHeapSimple, which would be rather expensive.
159 *
160 * This alternative implemenation let restrict page protection modifications
161 * to the pages backing the executable memory we just allocated.
162 */
163#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
164/** The chunk sub-allocation unit size in bytes. */
165#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
166/** The chunk sub-allocation unit size as a shift factor. */
167#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
168
169#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
170# ifdef IEMNATIVE_USE_GDB_JIT
171# define IEMNATIVE_USE_GDB_JIT_ET_DYN
172
173/** GDB JIT: Code entry. */
174typedef struct GDBJITCODEENTRY
175{
176 struct GDBJITCODEENTRY *pNext;
177 struct GDBJITCODEENTRY *pPrev;
178 uint8_t *pbSymFile;
179 uint64_t cbSymFile;
180} GDBJITCODEENTRY;
181
182/** GDB JIT: Actions. */
183typedef enum GDBJITACTIONS : uint32_t
184{
185 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
186} GDBJITACTIONS;
187
188/** GDB JIT: Descriptor. */
189typedef struct GDBJITDESCRIPTOR
190{
191 uint32_t uVersion;
192 GDBJITACTIONS enmAction;
193 GDBJITCODEENTRY *pRelevant;
194 GDBJITCODEENTRY *pHead;
195 /** Our addition: */
196 GDBJITCODEENTRY *pTail;
197} GDBJITDESCRIPTOR;
198
199/** GDB JIT: Our simple symbol file data. */
200typedef struct GDBJITSYMFILE
201{
202 Elf64_Ehdr EHdr;
203# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
204 Elf64_Shdr aShdrs[5];
205# else
206 Elf64_Shdr aShdrs[7];
207 Elf64_Phdr aPhdrs[2];
208# endif
209 /** The dwarf ehframe data for the chunk. */
210 uint8_t abEhFrame[512];
211 char szzStrTab[128];
212 Elf64_Sym aSymbols[3];
213# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
214 Elf64_Sym aDynSyms[2];
215 Elf64_Dyn aDyn[6];
216# endif
217} GDBJITSYMFILE;
218
219extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
220extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
221
222/** Init once for g_IemNativeGdbJitLock. */
223static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
224/** Init once for the critical section. */
225static RTCRITSECT g_IemNativeGdbJitLock;
226
227/** GDB reads the info here. */
228GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
229
230/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
231DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
232{
233 ASMNopPause();
234}
235
236/** @callback_method_impl{FNRTONCE} */
237static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
238{
239 RT_NOREF(pvUser);
240 return RTCritSectInit(&g_IemNativeGdbJitLock);
241}
242
243
244# endif /* IEMNATIVE_USE_GDB_JIT */
245
246/**
247 * Per-chunk unwind info for non-windows hosts.
248 */
249typedef struct IEMEXECMEMCHUNKEHFRAME
250{
251# ifdef IEMNATIVE_USE_LIBUNWIND
252 /** The offset of the FDA into abEhFrame. */
253 uintptr_t offFda;
254# else
255 /** 'struct object' storage area. */
256 uint8_t abObject[1024];
257# endif
258# ifdef IEMNATIVE_USE_GDB_JIT
259# if 0
260 /** The GDB JIT 'symbol file' data. */
261 GDBJITSYMFILE GdbJitSymFile;
262# endif
263 /** The GDB JIT list entry. */
264 GDBJITCODEENTRY GdbJitEntry;
265# endif
266 /** The dwarf ehframe data for the chunk. */
267 uint8_t abEhFrame[512];
268} IEMEXECMEMCHUNKEHFRAME;
269/** Pointer to per-chunk info info for non-windows hosts. */
270typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
271#endif
272
273
274/**
275 * An chunk of executable memory.
276 */
277typedef struct IEMEXECMEMCHUNK
278{
279#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
280 /** Number of free items in this chunk. */
281 uint32_t cFreeUnits;
282 /** Hint were to start searching for free space in the allocation bitmap. */
283 uint32_t idxFreeHint;
284#else
285 /** The heap handle. */
286 RTHEAPSIMPLE hHeap;
287#endif
288 /** Pointer to the chunk. */
289 void *pvChunk;
290#ifdef IN_RING3
291 /**
292 * Pointer to the unwind information.
293 *
294 * This is used during C++ throw and longjmp (windows and probably most other
295 * platforms). Some debuggers (windbg) makes use of it as well.
296 *
297 * Windows: This is allocated from hHeap on windows because (at least for
298 * AMD64) the UNWIND_INFO structure address in the
299 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
300 *
301 * Others: Allocated from the regular heap to avoid unnecessary executable data
302 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
303 void *pvUnwindInfo;
304#elif defined(IN_RING0)
305 /** Allocation handle. */
306 RTR0MEMOBJ hMemObj;
307#endif
308} IEMEXECMEMCHUNK;
309/** Pointer to a memory chunk. */
310typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
311
312
313/**
314 * Executable memory allocator for the native recompiler.
315 */
316typedef struct IEMEXECMEMALLOCATOR
317{
318 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
319 uint32_t uMagic;
320
321 /** The chunk size. */
322 uint32_t cbChunk;
323 /** The maximum number of chunks. */
324 uint32_t cMaxChunks;
325 /** The current number of chunks. */
326 uint32_t cChunks;
327 /** Hint where to start looking for available memory. */
328 uint32_t idxChunkHint;
329 /** Statistics: Current number of allocations. */
330 uint32_t cAllocations;
331
332 /** The total amount of memory available. */
333 uint64_t cbTotal;
334 /** Total amount of free memory. */
335 uint64_t cbFree;
336 /** Total amount of memory allocated. */
337 uint64_t cbAllocated;
338
339#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
340 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
341 *
342 * Since the chunk size is a power of two and the minimum chunk size is a lot
343 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
344 * require a whole number of uint64_t elements in the allocation bitmap. So,
345 * for sake of simplicity, they are allocated as one continous chunk for
346 * simplicity/laziness. */
347 uint64_t *pbmAlloc;
348 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
349 uint32_t cUnitsPerChunk;
350 /** Number of bitmap elements per chunk (for quickly locating the bitmap
351 * portion corresponding to an chunk). */
352 uint32_t cBitmapElementsPerChunk;
353#else
354 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
355 * @{ */
356 /** The size of the heap internal block header. This is used to adjust the
357 * request memory size to make sure there is exacly enough room for a header at
358 * the end of the blocks we allocate before the next 64 byte alignment line. */
359 uint32_t cbHeapBlockHdr;
360 /** The size of initial heap allocation required make sure the first
361 * allocation is correctly aligned. */
362 uint32_t cbHeapAlignTweak;
363 /** The alignment tweak allocation address. */
364 void *pvAlignTweak;
365 /** @} */
366#endif
367
368#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
369 /** Pointer to the array of unwind info running parallel to aChunks (same
370 * allocation as this structure, located after the bitmaps).
371 * (For Windows, the structures must reside in 32-bit RVA distance to the
372 * actual chunk, so they are allocated off the chunk.) */
373 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
374#endif
375
376 /** The allocation chunks. */
377 RT_FLEXIBLE_ARRAY_EXTENSION
378 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
379} IEMEXECMEMALLOCATOR;
380/** Pointer to an executable memory allocator. */
381typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
382
383/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
384#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
385
386
387static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
388
389
390/**
391 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
392 * the heap statistics.
393 */
394static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
395 uint32_t cbReq, uint32_t idxChunk)
396{
397 pExecMemAllocator->cAllocations += 1;
398 pExecMemAllocator->cbAllocated += cbReq;
399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
400 pExecMemAllocator->cbFree -= cbReq;
401#else
402 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
403#endif
404 pExecMemAllocator->idxChunkHint = idxChunk;
405
406#ifdef RT_OS_DARWIN
407 /*
408 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
409 * on darwin. So, we mark the pages returned as read+write after alloc and
410 * expect the caller to call iemExecMemAllocatorReadyForUse when done
411 * writing to the allocation.
412 *
413 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
414 * for details.
415 */
416 /** @todo detect if this is necessary... it wasn't required on 10.15 or
417 * whatever older version it was. */
418 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
419 AssertRC(rc);
420#endif
421
422 return pvRet;
423}
424
425
426#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
427static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
428 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
429{
430 /*
431 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
432 */
433 Assert(!(cToScan & 63));
434 Assert(!(idxFirst & 63));
435 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
436 pbmAlloc += idxFirst / 64;
437
438 /*
439 * Scan the bitmap for cReqUnits of consequtive clear bits
440 */
441 /** @todo This can probably be done more efficiently for non-x86 systems. */
442 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
443 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
444 {
445 uint32_t idxAddBit = 1;
446 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
447 idxAddBit++;
448 if (idxAddBit >= cReqUnits)
449 {
450 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
451
452 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
453 pChunk->cFreeUnits -= cReqUnits;
454 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
455
456 void * const pvRet = (uint8_t *)pChunk->pvChunk
457 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
458
459 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
460 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
461 }
462
463 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
464 }
465 return NULL;
466}
467#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
468
469
470static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
471{
472#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
473 /*
474 * Figure out how much to allocate.
475 */
476 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
477 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
478 {
479 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
480 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
481 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
482 {
483 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
484 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
485 if (pvRet)
486 return pvRet;
487 }
488 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
489 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
490 cReqUnits, idxChunk);
491 }
492#else
493 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
494 if (pvRet)
495 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
496#endif
497 return NULL;
498
499}
500
501
502/**
503 * Allocates @a cbReq bytes of executable memory.
504 *
505 * @returns Pointer to the memory, NULL if out of memory or other problem
506 * encountered.
507 * @param pVCpu The cross context virtual CPU structure of the calling
508 * thread.
509 * @param cbReq How many bytes are required.
510 */
511static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
512{
513 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
514 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
515 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
516
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /* What now? Prune native translation blocks from the cache? */
565 AssertFailed();
566 return NULL;
567}
568
569
570/** This is a hook that we may need later for changing memory protection back
571 * to readonly+exec */
572static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
573{
574#ifdef RT_OS_DARWIN
575 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
576 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
577 AssertRC(rc); RT_NOREF(pVCpu);
578
579 /*
580 * Flush the instruction cache:
581 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
582 */
583 /* sys_dcache_flush(pv, cb); - not necessary */
584 sys_icache_invalidate(pv, cb);
585#else
586 RT_NOREF(pVCpu, pv, cb);
587#endif
588}
589
590
591/**
592 * Frees executable memory.
593 */
594void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
595{
596 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
597 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
598 Assert(pv);
599#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
600 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
601#else
602 Assert(!((uintptr_t)pv & 63));
603#endif
604
605 /* Align the size as we did when allocating the block. */
606#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
607 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
608#else
609 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
610#endif
611
612 /* Free it / assert sanity. */
613#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
614 uint32_t const cChunks = pExecMemAllocator->cChunks;
615 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
616 bool fFound = false;
617 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
618 {
619 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
620 fFound = offChunk < cbChunk;
621 if (fFound)
622 {
623#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
624 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
625 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
626
627 /* Check that it's valid and free it. */
628 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
629 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
630 for (uint32_t i = 1; i < cReqUnits; i++)
631 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
632 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
633
634 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
635 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
636
637 /* Update the stats. */
638 pExecMemAllocator->cbAllocated -= cb;
639 pExecMemAllocator->cbFree += cb;
640 pExecMemAllocator->cAllocations -= 1;
641 return;
642#else
643 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
644 break;
645#endif
646 }
647 }
648# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
649 AssertFailed();
650# else
651 Assert(fFound);
652# endif
653#endif
654
655#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
656 /* Update stats while cb is freshly calculated.*/
657 pExecMemAllocator->cbAllocated -= cb;
658 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
659 pExecMemAllocator->cAllocations -= 1;
660
661 /* Free it. */
662 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
663#endif
664}
665
666
667
668#ifdef IN_RING3
669# ifdef RT_OS_WINDOWS
670
671/**
672 * Initializes the unwind info structures for windows hosts.
673 */
674static int
675iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
676 void *pvChunk, uint32_t idxChunk)
677{
678 RT_NOREF(pVCpu);
679
680 /*
681 * The AMD64 unwind opcodes.
682 *
683 * This is a program that starts with RSP after a RET instruction that
684 * ends up in recompiled code, and the operations we describe here will
685 * restore all non-volatile registers and bring RSP back to where our
686 * RET address is. This means it's reverse order from what happens in
687 * the prologue.
688 *
689 * Note! Using a frame register approach here both because we have one
690 * and but mainly because the UWOP_ALLOC_LARGE argument values
691 * would be a pain to write initializers for. On the positive
692 * side, we're impervious to changes in the the stack variable
693 * area can can deal with dynamic stack allocations if necessary.
694 */
695 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
696 {
697 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
698 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
699 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
700 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
701 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
702 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
703 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
704 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
705 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
706 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
707 };
708 union
709 {
710 IMAGE_UNWIND_INFO Info;
711 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
712 } s_UnwindInfo =
713 {
714 {
715 /* .Version = */ 1,
716 /* .Flags = */ 0,
717 /* .SizeOfProlog = */ 16, /* whatever */
718 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
719 /* .FrameRegister = */ X86_GREG_xBP,
720 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
721 }
722 };
723 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
724 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
725
726 /*
727 * Calc how much space we need and allocate it off the exec heap.
728 */
729 unsigned const cFunctionEntries = 1;
730 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
731 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
732# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
733 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
734 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
735 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
736# else
737 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
738 - pExecMemAllocator->cbHeapBlockHdr;
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
740 32 /*cbAlignment*/);
741# endif
742 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
743 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
744
745 /*
746 * Initialize the structures.
747 */
748 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
749
750 paFunctions[0].BeginAddress = 0;
751 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
752 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
753
754 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
755 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
756
757 /*
758 * Register it.
759 */
760 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
761 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
762
763 return VINF_SUCCESS;
764}
765
766
767# else /* !RT_OS_WINDOWS */
768
769/**
770 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
771 */
772DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
773{
774 if (iValue >= 64)
775 {
776 Assert(iValue < 0x2000);
777 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
778 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
779 }
780 else if (iValue >= 0)
781 *Ptr.pb++ = (uint8_t)iValue;
782 else if (iValue > -64)
783 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
784 else
785 {
786 Assert(iValue > -0x2000);
787 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
788 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
789 }
790 return Ptr;
791}
792
793
794/**
795 * Emits an ULEB128 encoded value (up to 64-bit wide).
796 */
797DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
798{
799 while (uValue >= 0x80)
800 {
801 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
802 uValue >>= 7;
803 }
804 *Ptr.pb++ = (uint8_t)uValue;
805 return Ptr;
806}
807
808
809/**
810 * Emits a CFA rule as register @a uReg + offset @a off.
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
813{
814 *Ptr.pb++ = DW_CFA_def_cfa;
815 Ptr = iemDwarfPutUleb128(Ptr, uReg);
816 Ptr = iemDwarfPutUleb128(Ptr, off);
817 return Ptr;
818}
819
820
821/**
822 * Emits a register (@a uReg) save location:
823 * CFA + @a off * data_alignment_factor
824 */
825DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
826{
827 if (uReg < 0x40)
828 *Ptr.pb++ = DW_CFA_offset | uReg;
829 else
830 {
831 *Ptr.pb++ = DW_CFA_offset_extended;
832 Ptr = iemDwarfPutUleb128(Ptr, uReg);
833 }
834 Ptr = iemDwarfPutUleb128(Ptr, off);
835 return Ptr;
836}
837
838
839# if 0 /* unused */
840/**
841 * Emits a register (@a uReg) save location, using signed offset:
842 * CFA + @a offSigned * data_alignment_factor
843 */
844DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
845{
846 *Ptr.pb++ = DW_CFA_offset_extended_sf;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
849 return Ptr;
850}
851# endif
852
853
854/**
855 * Initializes the unwind info section for non-windows hosts.
856 */
857static int
858iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
859 void *pvChunk, uint32_t idxChunk)
860{
861 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
862 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
863
864 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
865
866 /*
867 * Generate the CIE first.
868 */
869# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
870 uint8_t const iDwarfVer = 3;
871# else
872 uint8_t const iDwarfVer = 4;
873# endif
874 RTPTRUNION const PtrCie = Ptr;
875 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
876 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
877 *Ptr.pb++ = iDwarfVer; /* DwARF version */
878 *Ptr.pb++ = 0; /* Augmentation. */
879 if (iDwarfVer >= 4)
880 {
881 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
882 *Ptr.pb++ = 0; /* Segment selector size. */
883 }
884# ifdef RT_ARCH_AMD64
885 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
886# else
887 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
888# endif
889 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
890# ifdef RT_ARCH_AMD64
891 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
892# elif defined(RT_ARCH_ARM64)
893 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
894# else
895# error "port me"
896# endif
897 /* Initial instructions: */
898# ifdef RT_ARCH_AMD64
899 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
907# elif defined(RT_ARCH_ARM64)
908# if 1
909 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
910# else
911 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
912# endif
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
925 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
926 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
927# else
928# error "port me"
929# endif
930 while ((Ptr.u - PtrCie.u) & 3)
931 *Ptr.pb++ = DW_CFA_nop;
932 /* Finalize the CIE size. */
933 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
934
935 /*
936 * Generate an FDE for the whole chunk area.
937 */
938# ifdef IEMNATIVE_USE_LIBUNWIND
939 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
940# endif
941 RTPTRUNION const PtrFde = Ptr;
942 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
943 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
944 Ptr.pu32++;
945 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
946 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
947# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
948 *Ptr.pb++ = DW_CFA_nop;
949# endif
950 while ((Ptr.u - PtrFde.u) & 3)
951 *Ptr.pb++ = DW_CFA_nop;
952 /* Finalize the FDE size. */
953 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
954
955 /* Terminator entry. */
956 *Ptr.pu32++ = 0;
957 *Ptr.pu32++ = 0; /* just to be sure... */
958 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
959
960 /*
961 * Register it.
962 */
963# ifdef IEMNATIVE_USE_LIBUNWIND
964 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
965# else
966 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
967 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
968# endif
969
970# ifdef IEMNATIVE_USE_GDB_JIT
971 /*
972 * Now for telling GDB about this (experimental).
973 *
974 * This seems to work best with ET_DYN.
975 */
976 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
977# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
978 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
979 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
980# else
981 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
982 - pExecMemAllocator->cbHeapBlockHdr;
983 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
984# endif
985 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
986 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
987
988 RT_ZERO(*pSymFile);
989
990 /*
991 * The ELF header:
992 */
993 pSymFile->EHdr.e_ident[0] = ELFMAG0;
994 pSymFile->EHdr.e_ident[1] = ELFMAG1;
995 pSymFile->EHdr.e_ident[2] = ELFMAG2;
996 pSymFile->EHdr.e_ident[3] = ELFMAG3;
997 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
998 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
999 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1000 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1001# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1002 pSymFile->EHdr.e_type = ET_DYN;
1003# else
1004 pSymFile->EHdr.e_type = ET_REL;
1005# endif
1006# ifdef RT_ARCH_AMD64
1007 pSymFile->EHdr.e_machine = EM_AMD64;
1008# elif defined(RT_ARCH_ARM64)
1009 pSymFile->EHdr.e_machine = EM_AARCH64;
1010# else
1011# error "port me"
1012# endif
1013 pSymFile->EHdr.e_version = 1; /*?*/
1014 pSymFile->EHdr.e_entry = 0;
1015# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1016 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1017# else
1018 pSymFile->EHdr.e_phoff = 0;
1019# endif
1020 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1021 pSymFile->EHdr.e_flags = 0;
1022 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1023# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1024 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1025 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phentsize = 0;
1028 pSymFile->EHdr.e_phnum = 0;
1029# endif
1030 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1031 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1032 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1033
1034 uint32_t offStrTab = 0;
1035#define APPEND_STR(a_szStr) do { \
1036 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1037 offStrTab += sizeof(a_szStr); \
1038 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1039 } while (0)
1040#define APPEND_STR_FMT(a_szStr, ...) do { \
1041 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1042 offStrTab++; \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045
1046 /*
1047 * Section headers.
1048 */
1049 /* Section header #0: NULL */
1050 unsigned i = 0;
1051 APPEND_STR("");
1052 RT_ZERO(pSymFile->aShdrs[i]);
1053 i++;
1054
1055 /* Section header: .eh_frame */
1056 pSymFile->aShdrs[i].sh_name = offStrTab;
1057 APPEND_STR(".eh_frame");
1058 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1059 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1060# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1061 pSymFile->aShdrs[i].sh_offset
1062 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1063# else
1064 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1065 pSymFile->aShdrs[i].sh_offset = 0;
1066# endif
1067
1068 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1069 pSymFile->aShdrs[i].sh_link = 0;
1070 pSymFile->aShdrs[i].sh_info = 0;
1071 pSymFile->aShdrs[i].sh_addralign = 1;
1072 pSymFile->aShdrs[i].sh_entsize = 0;
1073 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1074 i++;
1075
1076 /* Section header: .shstrtab */
1077 unsigned const iShStrTab = i;
1078 pSymFile->EHdr.e_shstrndx = iShStrTab;
1079 pSymFile->aShdrs[i].sh_name = offStrTab;
1080 APPEND_STR(".shstrtab");
1081 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1082 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1083# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1084 pSymFile->aShdrs[i].sh_offset
1085 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1086# else
1087 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1088 pSymFile->aShdrs[i].sh_offset = 0;
1089# endif
1090 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1091 pSymFile->aShdrs[i].sh_link = 0;
1092 pSymFile->aShdrs[i].sh_info = 0;
1093 pSymFile->aShdrs[i].sh_addralign = 1;
1094 pSymFile->aShdrs[i].sh_entsize = 0;
1095 i++;
1096
1097 /* Section header: .symbols */
1098 pSymFile->aShdrs[i].sh_name = offStrTab;
1099 APPEND_STR(".symtab");
1100 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1101 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1102 pSymFile->aShdrs[i].sh_offset
1103 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1104 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1105 pSymFile->aShdrs[i].sh_link = iShStrTab;
1106 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1107 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1108 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1109 i++;
1110
1111# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".dynsym");
1115 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1124 i++;
1125# endif
1126
1127# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1128 /* Section header: .dynamic */
1129 pSymFile->aShdrs[i].sh_name = offStrTab;
1130 APPEND_STR(".dynamic");
1131 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1132 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1133 pSymFile->aShdrs[i].sh_offset
1134 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1135 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1136 pSymFile->aShdrs[i].sh_link = iShStrTab;
1137 pSymFile->aShdrs[i].sh_info = 0;
1138 pSymFile->aShdrs[i].sh_addralign = 1;
1139 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1140 i++;
1141# endif
1142
1143 /* Section header: .text */
1144 unsigned const iShText = i;
1145 pSymFile->aShdrs[i].sh_name = offStrTab;
1146 APPEND_STR(".text");
1147 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1148 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1149# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1150 pSymFile->aShdrs[i].sh_offset
1151 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1152# else
1153 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1154 pSymFile->aShdrs[i].sh_offset = 0;
1155# endif
1156 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1157 pSymFile->aShdrs[i].sh_link = 0;
1158 pSymFile->aShdrs[i].sh_info = 0;
1159 pSymFile->aShdrs[i].sh_addralign = 1;
1160 pSymFile->aShdrs[i].sh_entsize = 0;
1161 i++;
1162
1163 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1164
1165# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1166 /*
1167 * The program headers:
1168 */
1169 /* Everything in a single LOAD segment: */
1170 i = 0;
1171 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1172 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1173 pSymFile->aPhdrs[i].p_offset
1174 = pSymFile->aPhdrs[i].p_vaddr
1175 = pSymFile->aPhdrs[i].p_paddr = 0;
1176 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1177 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1178 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1179 i++;
1180 /* The .dynamic segment. */
1181 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1182 pSymFile->aPhdrs[i].p_flags = PF_R;
1183 pSymFile->aPhdrs[i].p_offset
1184 = pSymFile->aPhdrs[i].p_vaddr
1185 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1186 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1187 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1188 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1189 i++;
1190
1191 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1192
1193 /*
1194 * The dynamic section:
1195 */
1196 i = 0;
1197 pSymFile->aDyn[i].d_tag = DT_SONAME;
1198 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1199 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1202 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1205 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1206 i++;
1207 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1208 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1211 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_NULL;
1214 i++;
1215 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1216# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1217
1218 /*
1219 * Symbol tables:
1220 */
1221 /** @todo gdb doesn't seem to really like this ... */
1222 i = 0;
1223 pSymFile->aSymbols[i].st_name = 0;
1224 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1225 pSymFile->aSymbols[i].st_value = 0;
1226 pSymFile->aSymbols[i].st_size = 0;
1227 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1228 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1229# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1230 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1231# endif
1232 i++;
1233
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240 i++;
1241
1242 pSymFile->aSymbols[i].st_name = offStrTab;
1243 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1244# if 0
1245 pSymFile->aSymbols[i].st_shndx = iShText;
1246 pSymFile->aSymbols[i].st_value = 0;
1247# else
1248 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1249 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1250# endif
1251 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1252 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1253 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1254# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1255 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1256 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1257# endif
1258 i++;
1259
1260 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1261 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1262
1263 /*
1264 * The GDB JIT entry and informing GDB.
1265 */
1266 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1267# if 1
1268 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1269# else
1270 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1271# endif
1272
1273 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1274 RTCritSectEnter(&g_IemNativeGdbJitLock);
1275 pEhFrame->GdbJitEntry.pNext = NULL;
1276 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1277 if (__jit_debug_descriptor.pTail)
1278 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1279 else
1280 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1281 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1282 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1283
1284 /* Notify GDB: */
1285 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1286 __jit_debug_register_code();
1287 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1288 RTCritSectLeave(&g_IemNativeGdbJitLock);
1289
1290# else /* !IEMNATIVE_USE_GDB_JIT */
1291 RT_NOREF(pVCpu);
1292# endif /* !IEMNATIVE_USE_GDB_JIT */
1293
1294 return VINF_SUCCESS;
1295}
1296
1297# endif /* !RT_OS_WINDOWS */
1298#endif /* IN_RING3 */
1299
1300
1301/**
1302 * Adds another chunk to the executable memory allocator.
1303 *
1304 * This is used by the init code for the initial allocation and later by the
1305 * regular allocator function when it's out of memory.
1306 */
1307static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1308{
1309 /* Check that we've room for growth. */
1310 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1311 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1312
1313 /* Allocate a chunk. */
1314#ifdef RT_OS_DARWIN
1315 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1316#else
1317 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1318#endif
1319 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1320
1321#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1322 int rc = VINF_SUCCESS;
1323#else
1324 /* Initialize the heap for the chunk. */
1325 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1326 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1327 AssertRC(rc);
1328 if (RT_SUCCESS(rc))
1329 {
1330 /*
1331 * We want the memory to be aligned on 64 byte, so the first time thru
1332 * here we do some exploratory allocations to see how we can achieve this.
1333 * On subsequent runs we only make an initial adjustment allocation, if
1334 * necessary.
1335 *
1336 * Since we own the heap implementation, we know that the internal block
1337 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1338 * so all we need to wrt allocation size adjustments is to add 32 bytes
1339 * to the size, align up by 64 bytes, and subtract 32 bytes.
1340 *
1341 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1342 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1343 * allocation to force subsequent allocations to return 64 byte aligned
1344 * user areas.
1345 */
1346 if (!pExecMemAllocator->cbHeapBlockHdr)
1347 {
1348 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1349 pExecMemAllocator->cbHeapAlignTweak = 64;
1350 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1351 32 /*cbAlignment*/);
1352 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1353
1354 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1355 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1356 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1357 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1358 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1359
1360 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1361 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1362 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1363 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1364 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1365
1366 RTHeapSimpleFree(hHeap, pvTest2);
1367 RTHeapSimpleFree(hHeap, pvTest1);
1368 }
1369 else
1370 {
1371 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1372 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1373 }
1374 if (RT_SUCCESS(rc))
1375#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1376 {
1377 /*
1378 * Add the chunk.
1379 *
1380 * This must be done before the unwind init so windows can allocate
1381 * memory from the chunk when using the alternative sub-allocator.
1382 */
1383 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1384#ifdef IN_RING3
1385 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1386#endif
1387#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1388 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1389#else
1390 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1391 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1392 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1393 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1394#endif
1395
1396 pExecMemAllocator->cChunks = idxChunk + 1;
1397 pExecMemAllocator->idxChunkHint = idxChunk;
1398
1399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1400 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1401 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1402#else
1403 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1404 pExecMemAllocator->cbTotal += cbFree;
1405 pExecMemAllocator->cbFree += cbFree;
1406#endif
1407
1408#ifdef IN_RING3
1409 /*
1410 * Initialize the unwind information (this cannot really fail atm).
1411 * (This sets pvUnwindInfo.)
1412 */
1413 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1414 if (RT_SUCCESS(rc))
1415#endif
1416 {
1417 return VINF_SUCCESS;
1418 }
1419
1420#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1421 /* Just in case the impossible happens, undo the above up: */
1422 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1423 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1424 pExecMemAllocator->cChunks = idxChunk;
1425 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1426 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1427 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1428 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1429#endif
1430 }
1431#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 }
1433#endif
1434 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1435 RT_NOREF(pVCpu);
1436 return rc;
1437}
1438
1439
1440/**
1441 * Initializes the executable memory allocator for native recompilation on the
1442 * calling EMT.
1443 *
1444 * @returns VBox status code.
1445 * @param pVCpu The cross context virtual CPU structure of the calling
1446 * thread.
1447 * @param cbMax The max size of the allocator.
1448 * @param cbInitial The initial allocator size.
1449 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1450 * dependent).
1451 */
1452int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1453{
1454 /*
1455 * Validate input.
1456 */
1457 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1458 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1459 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1460 || cbChunk == 0
1461 || ( RT_IS_POWER_OF_TWO(cbChunk)
1462 && cbChunk >= _1M
1463 && cbChunk <= _256M
1464 && cbChunk <= cbMax),
1465 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1466 VERR_OUT_OF_RANGE);
1467
1468 /*
1469 * Adjust/figure out the chunk size.
1470 */
1471 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1472 {
1473 if (cbMax >= _256M)
1474 cbChunk = _64M;
1475 else
1476 {
1477 if (cbMax < _16M)
1478 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1479 else
1480 cbChunk = (uint32_t)cbMax / 4;
1481 if (!RT_IS_POWER_OF_TWO(cbChunk))
1482 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1483 }
1484 }
1485
1486 if (cbChunk > cbMax)
1487 cbMax = cbChunk;
1488 else
1489 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1490 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1491 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1492
1493 /*
1494 * Allocate and initialize the allocatore instance.
1495 */
1496 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1497#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1498 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1499 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1500 cbNeeded += cbBitmap * cMaxChunks;
1501 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1502 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1503#endif
1504#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1505 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1506 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1507#endif
1508 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1509 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1510 VERR_NO_MEMORY);
1511 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1512 pExecMemAllocator->cbChunk = cbChunk;
1513 pExecMemAllocator->cMaxChunks = cMaxChunks;
1514 pExecMemAllocator->cChunks = 0;
1515 pExecMemAllocator->idxChunkHint = 0;
1516 pExecMemAllocator->cAllocations = 0;
1517 pExecMemAllocator->cbTotal = 0;
1518 pExecMemAllocator->cbFree = 0;
1519 pExecMemAllocator->cbAllocated = 0;
1520#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1521 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1522 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1523 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1524 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1525#endif
1526#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1527 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1528#endif
1529 for (uint32_t i = 0; i < cMaxChunks; i++)
1530 {
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1533 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1534#else
1535 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1536#endif
1537 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1538#ifdef IN_RING0
1539 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1540#else
1541 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1542#endif
1543 }
1544 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1545
1546 /*
1547 * Do the initial allocations.
1548 */
1549 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1550 {
1551 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1552 AssertLogRelRCReturn(rc, rc);
1553 }
1554
1555 pExecMemAllocator->idxChunkHint = 0;
1556
1557 return VINF_SUCCESS;
1558}
1559
1560
1561/*********************************************************************************************************************************
1562* Native Recompilation *
1563*********************************************************************************************************************************/
1564
1565
1566/**
1567 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1568 */
1569IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1570{
1571 pVCpu->iem.s.cInstructions += idxInstr;
1572 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1573}
1574
1575
1576/**
1577 * Used by TB code when it wants to raise a \#GP(0).
1578 */
1579IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1580{
1581 pVCpu->iem.s.cInstructions += idxInstr;
1582 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1583#ifndef _MSC_VER
1584 return VINF_IEM_RAISED_XCPT; /* not reached */
1585#endif
1586}
1587
1588
1589/**
1590 * Reinitializes the native recompiler state.
1591 *
1592 * Called before starting a new recompile job.
1593 */
1594static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1595{
1596 pReNative->cLabels = 0;
1597 pReNative->bmLabelTypes = 0;
1598 pReNative->cFixups = 0;
1599#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1600 pReNative->pDbgInfo->cEntries = 0;
1601#endif
1602 pReNative->pTbOrg = pTb;
1603 pReNative->cCondDepth = 0;
1604 pReNative->uCondSeqNo = 0;
1605 pReNative->uCheckIrqSeqNo = 0;
1606
1607 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1608#if IEMNATIVE_HST_GREG_COUNT < 32
1609 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1610#endif
1611 ;
1612 pReNative->Core.bmHstRegsWithGstShadow = 0;
1613 pReNative->Core.bmGstRegShadows = 0;
1614 pReNative->Core.bmVars = 0;
1615 pReNative->Core.bmStack = 0;
1616 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1617 pReNative->Core.u64ArgVars = UINT64_MAX;
1618
1619 /* Full host register reinit: */
1620 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1621 {
1622 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1623 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1624 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1625 }
1626
1627 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1628 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1629#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1630 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1631#endif
1632#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1633 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1634#endif
1635 );
1636 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1637 {
1638 fRegs &= ~RT_BIT_32(idxReg);
1639 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1640 }
1641
1642 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1643#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1644 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1645#endif
1646#ifdef IEMNATIVE_REG_FIXED_TMP0
1647 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1648#endif
1649 return pReNative;
1650}
1651
1652
1653/**
1654 * Allocates and initializes the native recompiler state.
1655 *
1656 * This is called the first time an EMT wants to recompile something.
1657 *
1658 * @returns Pointer to the new recompiler state.
1659 * @param pVCpu The cross context virtual CPU structure of the calling
1660 * thread.
1661 * @param pTb The TB that's about to be recompiled.
1662 * @thread EMT(pVCpu)
1663 */
1664static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1665{
1666 VMCPU_ASSERT_EMT(pVCpu);
1667
1668 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1669 AssertReturn(pReNative, NULL);
1670
1671 /*
1672 * Try allocate all the buffers and stuff we need.
1673 */
1674 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1675 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1676 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1677#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1678 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1679#endif
1680 if (RT_LIKELY( pReNative->pInstrBuf
1681 && pReNative->paLabels
1682 && pReNative->paFixups)
1683#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1684 && pReNative->pDbgInfo
1685#endif
1686 )
1687 {
1688 /*
1689 * Set the buffer & array sizes on success.
1690 */
1691 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1692 pReNative->cLabelsAlloc = _8K;
1693 pReNative->cFixupsAlloc = _16K;
1694#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1695 pReNative->cDbgInfoAlloc = _16K;
1696#endif
1697
1698 /*
1699 * Done, just need to save it and reinit it.
1700 */
1701 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1702 return iemNativeReInit(pReNative, pTb);
1703 }
1704
1705 /*
1706 * Failed. Cleanup and return.
1707 */
1708 AssertFailed();
1709 RTMemFree(pReNative->pInstrBuf);
1710 RTMemFree(pReNative->paLabels);
1711 RTMemFree(pReNative->paFixups);
1712#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1713 RTMemFree(pReNative->pDbgInfo);
1714#endif
1715 RTMemFree(pReNative);
1716 return NULL;
1717}
1718
1719
1720/**
1721 * Creates a label
1722 *
1723 * If the label does not yet have a defined position,
1724 * call iemNativeLabelDefine() later to set it.
1725 *
1726 * @returns Label ID. Throws VBox status code on failure, so no need to check
1727 * the return value.
1728 * @param pReNative The native recompile state.
1729 * @param enmType The label type.
1730 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1731 * label is not yet defined (default).
1732 * @param uData Data associated with the lable. Only applicable to
1733 * certain type of labels. Default is zero.
1734 */
1735DECL_HIDDEN_THROW(uint32_t)
1736iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1737 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1738{
1739 /*
1740 * Locate existing label definition.
1741 *
1742 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1743 * and uData is zero.
1744 */
1745 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1746 uint32_t const cLabels = pReNative->cLabels;
1747 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1748#ifndef VBOX_STRICT
1749 && offWhere == UINT32_MAX
1750 && uData == 0
1751#endif
1752 )
1753 {
1754 /** @todo Since this is only used for labels with uData = 0, just use a
1755 * lookup array? */
1756 for (uint32_t i = 0; i < cLabels; i++)
1757 if ( paLabels[i].enmType == enmType
1758 && paLabels[i].uData == uData)
1759 {
1760#ifdef VBOX_STRICT
1761 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1762 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1763#endif
1764 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1765 return i;
1766 }
1767 }
1768
1769 /*
1770 * Make sure we've got room for another label.
1771 */
1772 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1773 { /* likely */ }
1774 else
1775 {
1776 uint32_t cNew = pReNative->cLabelsAlloc;
1777 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1778 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1779 cNew *= 2;
1780 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1781 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1782 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1783 pReNative->paLabels = paLabels;
1784 pReNative->cLabelsAlloc = cNew;
1785 }
1786
1787 /*
1788 * Define a new label.
1789 */
1790 paLabels[cLabels].off = offWhere;
1791 paLabels[cLabels].enmType = enmType;
1792 paLabels[cLabels].uData = uData;
1793 pReNative->cLabels = cLabels + 1;
1794
1795 Assert((unsigned)enmType < 64);
1796 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1797
1798 if (offWhere != UINT32_MAX)
1799 {
1800#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1801 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1802 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1803#endif
1804 }
1805 return cLabels;
1806}
1807
1808
1809/**
1810 * Defines the location of an existing label.
1811 *
1812 * @param pReNative The native recompile state.
1813 * @param idxLabel The label to define.
1814 * @param offWhere The position.
1815 */
1816DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1817{
1818 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1819 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1820 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1821 pLabel->off = offWhere;
1822#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1823 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1824 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1825#endif
1826}
1827
1828
1829/**
1830 * Looks up a lable.
1831 *
1832 * @returns Label ID if found, UINT32_MAX if not.
1833 */
1834static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1835 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1836{
1837 Assert((unsigned)enmType < 64);
1838 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1839 {
1840 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1841 uint32_t const cLabels = pReNative->cLabels;
1842 for (uint32_t i = 0; i < cLabels; i++)
1843 if ( paLabels[i].enmType == enmType
1844 && paLabels[i].uData == uData
1845 && ( paLabels[i].off == offWhere
1846 || offWhere == UINT32_MAX
1847 || paLabels[i].off == UINT32_MAX))
1848 return i;
1849 }
1850 return UINT32_MAX;
1851}
1852
1853
1854/**
1855 * Adds a fixup.
1856 *
1857 * @throws VBox status code (int) on failure.
1858 * @param pReNative The native recompile state.
1859 * @param offWhere The instruction offset of the fixup location.
1860 * @param idxLabel The target label ID for the fixup.
1861 * @param enmType The fixup type.
1862 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1863 */
1864DECL_HIDDEN_THROW(void)
1865iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1866 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1867{
1868 Assert(idxLabel <= UINT16_MAX);
1869 Assert((unsigned)enmType <= UINT8_MAX);
1870
1871 /*
1872 * Make sure we've room.
1873 */
1874 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1875 uint32_t const cFixups = pReNative->cFixups;
1876 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1877 { /* likely */ }
1878 else
1879 {
1880 uint32_t cNew = pReNative->cFixupsAlloc;
1881 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1882 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1883 cNew *= 2;
1884 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1885 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1886 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1887 pReNative->paFixups = paFixups;
1888 pReNative->cFixupsAlloc = cNew;
1889 }
1890
1891 /*
1892 * Add the fixup.
1893 */
1894 paFixups[cFixups].off = offWhere;
1895 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1896 paFixups[cFixups].enmType = enmType;
1897 paFixups[cFixups].offAddend = offAddend;
1898 pReNative->cFixups = cFixups + 1;
1899}
1900
1901
1902/**
1903 * Slow code path for iemNativeInstrBufEnsure.
1904 */
1905DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1906{
1907 /* Double the buffer size till we meet the request. */
1908 uint32_t cNew = pReNative->cInstrBufAlloc;
1909 AssertReturn(cNew > 0, NULL);
1910 do
1911 cNew *= 2;
1912 while (cNew < off + cInstrReq);
1913
1914 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1915#ifdef RT_ARCH_ARM64
1916 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1917#else
1918 uint32_t const cbMaxInstrBuf = _2M;
1919#endif
1920 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1921
1922 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1923 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1924
1925 pReNative->cInstrBufAlloc = cNew;
1926 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1927}
1928
1929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1930
1931/**
1932 * Grows the static debug info array used during recompilation.
1933 *
1934 * @returns Pointer to the new debug info block; throws VBox status code on
1935 * failure, so no need to check the return value.
1936 */
1937DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1938{
1939 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1940 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1941 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1942 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1943 pReNative->pDbgInfo = pDbgInfo;
1944 pReNative->cDbgInfoAlloc = cNew;
1945 return pDbgInfo;
1946}
1947
1948
1949/**
1950 * Adds a new debug info uninitialized entry, returning the pointer to it.
1951 */
1952DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1953{
1954 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1955 { /* likely */ }
1956 else
1957 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1958 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1959}
1960
1961
1962/**
1963 * Debug Info: Adds a native offset record, if necessary.
1964 */
1965static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1966{
1967 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1968
1969 /*
1970 * Search backwards to see if we've got a similar record already.
1971 */
1972 uint32_t idx = pDbgInfo->cEntries;
1973 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1974 while (idx-- > idxStop)
1975 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1976 {
1977 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1978 return;
1979 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1980 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1981 break;
1982 }
1983
1984 /*
1985 * Add it.
1986 */
1987 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1988 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1989 pEntry->NativeOffset.offNative = off;
1990}
1991
1992
1993/**
1994 * Debug Info: Record info about a label.
1995 */
1996static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
1997{
1998 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1999 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2000 pEntry->Label.uUnused = 0;
2001 pEntry->Label.enmLabel = (uint8_t)enmType;
2002 pEntry->Label.uData = uData;
2003}
2004
2005
2006/**
2007 * Debug Info: Record info about a threaded call.
2008 */
2009static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2010{
2011 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2012 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2013 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2014 pEntry->ThreadedCall.uUnused = 0;
2015 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2016}
2017
2018
2019/**
2020 * Debug Info: Record info about a new guest instruction.
2021 */
2022static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2023{
2024 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2025 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2026 pEntry->GuestInstruction.uUnused = 0;
2027 pEntry->GuestInstruction.fExec = fExec;
2028}
2029
2030
2031/**
2032 * Debug Info: Record info about guest register shadowing.
2033 */
2034static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2035 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2036{
2037 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2038 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2039 pEntry->GuestRegShadowing.uUnused = 0;
2040 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2041 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2042 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2043}
2044
2045#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2046
2047
2048/*********************************************************************************************************************************
2049* Register Allocator *
2050*********************************************************************************************************************************/
2051
2052/**
2053 * Register parameter indexes (indexed by argument number).
2054 */
2055DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2056{
2057 IEMNATIVE_CALL_ARG0_GREG,
2058 IEMNATIVE_CALL_ARG1_GREG,
2059 IEMNATIVE_CALL_ARG2_GREG,
2060 IEMNATIVE_CALL_ARG3_GREG,
2061#if defined(IEMNATIVE_CALL_ARG4_GREG)
2062 IEMNATIVE_CALL_ARG4_GREG,
2063# if defined(IEMNATIVE_CALL_ARG5_GREG)
2064 IEMNATIVE_CALL_ARG5_GREG,
2065# if defined(IEMNATIVE_CALL_ARG6_GREG)
2066 IEMNATIVE_CALL_ARG6_GREG,
2067# if defined(IEMNATIVE_CALL_ARG7_GREG)
2068 IEMNATIVE_CALL_ARG7_GREG,
2069# endif
2070# endif
2071# endif
2072#endif
2073};
2074
2075/**
2076 * Call register masks indexed by argument count.
2077 */
2078DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2079{
2080 0,
2081 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2083 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2084 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2085 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2086#if defined(IEMNATIVE_CALL_ARG4_GREG)
2087 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2088 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2089# if defined(IEMNATIVE_CALL_ARG5_GREG)
2090 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2091 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2092# if defined(IEMNATIVE_CALL_ARG6_GREG)
2093 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2094 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2095 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2096# if defined(IEMNATIVE_CALL_ARG7_GREG)
2097 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2098 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2099 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2100# endif
2101# endif
2102# endif
2103#endif
2104};
2105
2106#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2107/**
2108 * BP offset of the stack argument slots.
2109 *
2110 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2111 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2112 */
2113DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2114{
2115 IEMNATIVE_FP_OFF_STACK_ARG0,
2116# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2117 IEMNATIVE_FP_OFF_STACK_ARG1,
2118# endif
2119# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2120 IEMNATIVE_FP_OFF_STACK_ARG2,
2121# endif
2122# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2123 IEMNATIVE_FP_OFF_STACK_ARG3,
2124# endif
2125};
2126AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2127#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2128
2129/**
2130 * Info about shadowed guest register values.
2131 * @see IEMNATIVEGSTREG
2132 */
2133static struct
2134{
2135 /** Offset in VMCPU. */
2136 uint32_t off;
2137 /** The field size. */
2138 uint8_t cb;
2139 /** Name (for logging). */
2140 const char *pszName;
2141} const g_aGstShadowInfo[] =
2142{
2143#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2144 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2145 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2146 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2147 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2148 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2160 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2161 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2162 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2163 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2164 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2165 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2166 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2167 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2168 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2169 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2170 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2171 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2172 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2173 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2174 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2175 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2176 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2177 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2178 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2179 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2180 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2181 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2182 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2183 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2184 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2185 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2186#undef CPUMCTX_OFF_AND_SIZE
2187};
2188AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2189
2190
2191/** Host CPU general purpose register names. */
2192const char * const g_apszIemNativeHstRegNames[] =
2193{
2194#ifdef RT_ARCH_AMD64
2195 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2196#elif RT_ARCH_ARM64
2197 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2198 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2199#else
2200# error "port me"
2201#endif
2202};
2203
2204
2205DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2206 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2207{
2208 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2209
2210 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2211 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2212 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2213 return (uint8_t)idxReg;
2214}
2215
2216
2217/**
2218 * Tries to locate a suitable register in the given register mask.
2219 *
2220 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2221 * failed.
2222 *
2223 * @returns Host register number on success, returns UINT8_MAX on failure.
2224 */
2225static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2226{
2227 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2228 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2229 if (fRegs)
2230 {
2231 /** @todo pick better here: */
2232 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2233
2234 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2235 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2236 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2237 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2238
2239 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2240 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2241 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2242 return idxReg;
2243 }
2244 return UINT8_MAX;
2245}
2246
2247
2248/**
2249 * Locate a register, possibly freeing one up.
2250 *
2251 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2252 * failed.
2253 *
2254 * @returns Host register number on success. Returns UINT8_MAX if no registers
2255 * found, the caller is supposed to deal with this and raise a
2256 * allocation type specific status code (if desired).
2257 *
2258 * @throws VBox status code if we're run into trouble spilling a variable of
2259 * recording debug info. Does NOT throw anything if we're out of
2260 * registers, though.
2261 */
2262static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2263 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2264{
2265 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2266 Assert(!(fRegMask & ~IEMNATIVE_REG_FIXED_MASK));
2267
2268 /*
2269 * Try a freed register that's shadowing a guest register
2270 */
2271 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2272 if (fRegs)
2273 {
2274 unsigned const idxReg = (fPreferVolatile
2275 ? ASMBitFirstSetU32(fRegs)
2276 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2277 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2278 - 1;
2279
2280 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2281 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2282 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2283 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2284
2285 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2286 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2287 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2288 return idxReg;
2289 }
2290
2291 /*
2292 * Try free up a variable that's in a register.
2293 *
2294 * We do two rounds here, first evacuating variables we don't need to be
2295 * saved on the stack, then in the second round move things to the stack.
2296 */
2297 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2298 {
2299 uint32_t fVars = pReNative->Core.bmVars;
2300 while (fVars)
2301 {
2302 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2303 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2304 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2305 && (RT_BIT_32(idxReg) & fRegMask)
2306 && ( iLoop == 0
2307 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2308 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2309 {
2310 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2311 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2312 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2313 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2314 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2315
2316 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2317 {
2318 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
2319 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2320 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff,
2321 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2322 - IEMNATIVE_FP_OFF_STACK_VARS,
2323 idxReg);
2324 }
2325
2326 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2327 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2328 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2329 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2330 return idxReg;
2331 }
2332 fVars &= ~RT_BIT_32(idxVar);
2333 }
2334 }
2335
2336 return UINT8_MAX;
2337}
2338
2339
2340/**
2341 * Moves a variable to a different register or spills it onto the stack.
2342 *
2343 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2344 * kinds can easily be recreated if needed later.
2345 *
2346 * @returns The new code buffer position, UINT32_MAX on failure.
2347 * @param pReNative The native recompile state.
2348 * @param off The current code buffer position.
2349 * @param idxVar The variable index.
2350 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2351 * call-volatile registers.
2352 */
2353static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2354 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2355{
2356 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2357 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2358
2359 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2360 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2361 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2362 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2363 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2364 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2365 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2366 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2367
2368
2369 /** @todo Add statistics on this.*/
2370 /** @todo Implement basic variable liveness analysis (python) so variables
2371 * can be freed immediately once no longer used. This has the potential to
2372 * be trashing registers and stack for dead variables. */
2373
2374 /*
2375 * First try move it to a different register, as that's cheaper.
2376 */
2377 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2378 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2379 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2380 if (fRegs)
2381 {
2382 /* Avoid using shadow registers, if possible. */
2383 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2384 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2385 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2386
2387 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2388 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2389 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2390 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2391 if (fGstRegShadows)
2392 {
2393 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2394 while (fGstRegShadows)
2395 {
2396 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2397 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2398
2399 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2400 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2401 }
2402 }
2403
2404 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2405 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2406 }
2407 /*
2408 * Otherwise we must spill the register onto the stack.
2409 */
2410 else
2411 {
2412 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2413 off = iemNativeEmitStoreGprByBp(pReNative, off,
2414 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2415 - IEMNATIVE_FP_OFF_STACK_VARS,
2416 idxRegOld);
2417
2418 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2419 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2420 }
2421
2422 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2423 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2424 return off;
2425}
2426
2427
2428/**
2429 * Allocates a temporary host general purpose register.
2430 *
2431 * This may emit code to save register content onto the stack in order to free
2432 * up a register.
2433 *
2434 * @returns The host register number; throws VBox status code on failure,
2435 * so no need to check the return value.
2436 * @param pReNative The native recompile state.
2437 * @param poff Pointer to the variable with the code buffer position.
2438 * This will be update if we need to move a variable from
2439 * register to stack in order to satisfy the request.
2440 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2441 * registers (@c true, default) or the other way around
2442 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2443 */
2444DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2445{
2446 /*
2447 * Try find a completely unused register, preferably a call-volatile one.
2448 */
2449 uint8_t idxReg;
2450 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2451 & ~pReNative->Core.bmHstRegsWithGstShadow
2452 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2453 if (fRegs)
2454 {
2455 if (fPreferVolatile)
2456 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2457 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2458 else
2459 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2460 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2461 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2462 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2463 }
2464 else
2465 {
2466 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2467 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2468 }
2469 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2470}
2471
2472
2473/**
2474 * Allocates a temporary register for loading an immediate value into.
2475 *
2476 * This will emit code to load the immediate, unless there happens to be an
2477 * unused register with the value already loaded.
2478 *
2479 * The caller will not modify the returned register, it must be considered
2480 * read-only. Free using iemNativeRegFreeTmpImm.
2481 *
2482 * @returns The host register number; throws VBox status code on failure, so no
2483 * need to check the return value.
2484 * @param pReNative The native recompile state.
2485 * @param poff Pointer to the variable with the code buffer position.
2486 * @param uImm The immediate value that the register must hold upon
2487 * return.
2488 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2489 * registers (@c true, default) or the other way around
2490 * (@c false).
2491 *
2492 * @note Reusing immediate values has not been implemented yet.
2493 */
2494DECL_HIDDEN_THROW(uint8_t)
2495iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2496{
2497 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2498 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2499 return idxReg;
2500}
2501
2502
2503/**
2504 * Marks host register @a idxHstReg as containing a shadow copy of guest
2505 * register @a enmGstReg.
2506 *
2507 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2508 * host register before calling.
2509 */
2510DECL_FORCE_INLINE(void)
2511iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2512{
2513 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2514
2515 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2516 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2517 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2518 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2519#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2520 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2521 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2522#else
2523 RT_NOREF(off);
2524#endif
2525}
2526
2527
2528/**
2529 * Clear any guest register shadow claims from @a idxHstReg.
2530 *
2531 * The register does not need to be shadowing any guest registers.
2532 */
2533DECL_FORCE_INLINE(void)
2534iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2535{
2536 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2537 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2538 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2539 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2540
2541#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2542 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2543 if (fGstRegs)
2544 {
2545 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2546 while (fGstRegs)
2547 {
2548 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2549 fGstRegs &= ~RT_BIT_64(iGstReg);
2550 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2551 }
2552 }
2553#else
2554 RT_NOREF(off);
2555#endif
2556
2557 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2558 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2559 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2560}
2561
2562
2563/**
2564 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2565 * to @a idxRegTo.
2566 */
2567DECL_FORCE_INLINE(void)
2568iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2569 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2570{
2571 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2572 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2573 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2574 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2575 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2576
2577 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2578 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2579 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2580#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2581 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2582 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2583#else
2584 RT_NOREF(off);
2585#endif
2586}
2587
2588
2589/**
2590 * Allocates a temporary host general purpose register for keeping a guest
2591 * register value.
2592 *
2593 * Since we may already have a register holding the guest register value,
2594 * code will be emitted to do the loading if that's not the case. Code may also
2595 * be emitted if we have to free up a register to satify the request.
2596 *
2597 * @returns The host register number; throws VBox status code on failure, so no
2598 * need to check the return value.
2599 * @param pReNative The native recompile state.
2600 * @param poff Pointer to the variable with the code buffer
2601 * position. This will be update if we need to move a
2602 * variable from register to stack in order to satisfy
2603 * the request.
2604 * @param enmGstReg The guest register that will is to be updated.
2605 * @param enmIntendedUse How the caller will be using the host register.
2606 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2607 */
2608DECL_HIDDEN_THROW(uint8_t)
2609iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2610 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2611{
2612 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2613#ifdef LOG_ENABLED
2614 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2615#endif
2616
2617 /*
2618 * First check if the guest register value is already in a host register.
2619 */
2620 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2621 {
2622 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2623 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2624 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2625 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2626
2627 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2628 {
2629 /*
2630 * If the register will trash the guest shadow copy, try find a
2631 * completely unused register we can use instead. If that fails,
2632 * we need to disassociate the host reg from the guest reg.
2633 */
2634 /** @todo would be nice to know if preserving the register is in any way helpful. */
2635 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2636 && ( ~pReNative->Core.bmHstRegs
2637 & ~pReNative->Core.bmHstRegsWithGstShadow
2638 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2639 {
2640 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2641
2642 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2643
2644 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2645 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2646 g_apszIemNativeHstRegNames[idxRegNew]));
2647 idxReg = idxRegNew;
2648 }
2649 else
2650 {
2651 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2652 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2653 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2654 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2655 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2656 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2657 else
2658 {
2659 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2660 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2661 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2662 }
2663 }
2664 }
2665 else
2666 {
2667 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2668 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2669
2670 /*
2671 * Allocate a new register, copy the value and, if updating, the
2672 * guest shadow copy assignment to the new register.
2673 */
2674 /** @todo share register for readonly access. */
2675 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2676
2677 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2678
2679 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2680 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2681 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2682 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2683 else
2684 {
2685 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2686 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2687 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2688 g_apszIemNativeHstRegNames[idxRegNew]));
2689 }
2690 idxReg = idxRegNew;
2691 }
2692
2693#ifdef VBOX_STRICT
2694 /* Strict builds: Check that the value is correct. */
2695 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2696#endif
2697
2698 return idxReg;
2699 }
2700
2701 /*
2702 * Allocate a new register, load it with the guest value and designate it as a copy of the
2703 */
2704 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2705
2706 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2707
2708 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2709 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2710 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2711 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2712
2713 return idxRegNew;
2714}
2715
2716
2717/**
2718 * Allocates a temporary host general purpose register that already holds the
2719 * given guest register value.
2720 *
2721 * The use case for this function is places where the shadowing state cannot be
2722 * modified due to branching and such. This will fail if the we don't have a
2723 * current shadow copy handy or if it's incompatible. The only code that will
2724 * be emitted here is value checking code in strict builds.
2725 *
2726 * The intended use can only be readonly!
2727 *
2728 * @returns The host register number, UINT8_MAX if not present.
2729 * @param pReNative The native recompile state.
2730 * @param poff Pointer to the instruction buffer offset.
2731 * Will be updated in strict builds if a register is
2732 * found.
2733 * @param enmGstReg The guest register that will is to be updated.
2734 * @note In strict builds, this may throw instruction buffer growth failures.
2735 * Non-strict builds will not throw anything.
2736 * @sa iemNativeRegAllocTmpForGuestReg
2737 */
2738DECL_HIDDEN_THROW(uint8_t)
2739iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2740{
2741 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2742
2743 /*
2744 * First check if the guest register value is already in a host register.
2745 */
2746 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2747 {
2748 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2749 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2750 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2751 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2752
2753 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2754 {
2755 /*
2756 * We only do readonly use here, so easy compared to the other
2757 * variant of this code.
2758 */
2759 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2760 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2761 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2762 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2763 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2764
2765#ifdef VBOX_STRICT
2766 /* Strict builds: Check that the value is correct. */
2767 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2768#else
2769 RT_NOREF(poff);
2770#endif
2771 return idxReg;
2772 }
2773 }
2774
2775 return UINT8_MAX;
2776}
2777
2778
2779DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2780
2781
2782/**
2783 * Allocates argument registers for a function call.
2784 *
2785 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2786 * need to check the return value.
2787 * @param pReNative The native recompile state.
2788 * @param off The current code buffer offset.
2789 * @param cArgs The number of arguments the function call takes.
2790 */
2791DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2792{
2793 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2794 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2795 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2796 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2797
2798 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2799 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2800 else if (cArgs == 0)
2801 return true;
2802
2803 /*
2804 * Do we get luck and all register are free and not shadowing anything?
2805 */
2806 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2807 for (uint32_t i = 0; i < cArgs; i++)
2808 {
2809 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2810 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2811 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2812 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2813 }
2814 /*
2815 * Okay, not lucky so we have to free up the registers.
2816 */
2817 else
2818 for (uint32_t i = 0; i < cArgs; i++)
2819 {
2820 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2821 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2822 {
2823 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2824 {
2825 case kIemNativeWhat_Var:
2826 {
2827 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2828 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2829 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2830 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2831 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2832
2833 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2834 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2835 else
2836 {
2837 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2838 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2839 }
2840 break;
2841 }
2842
2843 case kIemNativeWhat_Tmp:
2844 case kIemNativeWhat_Arg:
2845 case kIemNativeWhat_rc:
2846 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2847 default:
2848 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2849 }
2850
2851 }
2852 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2853 {
2854 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2855 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2856 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2857 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2858 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2859 }
2860 else
2861 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2862 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2863 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2864 }
2865 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2866 return true;
2867}
2868
2869
2870DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2871
2872
2873#if 0
2874/**
2875 * Frees a register assignment of any type.
2876 *
2877 * @param pReNative The native recompile state.
2878 * @param idxHstReg The register to free.
2879 *
2880 * @note Does not update variables.
2881 */
2882DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2883{
2884 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2885 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2886 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2887 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2888 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2889 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2890 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2891 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2892 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2893 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2894 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2895 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2896 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2897 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2898
2899 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2900 /* no flushing, right:
2901 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2902 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2903 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2904 */
2905}
2906#endif
2907
2908
2909/**
2910 * Frees a temporary register.
2911 *
2912 * Any shadow copies of guest registers assigned to the host register will not
2913 * be flushed by this operation.
2914 */
2915DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2916{
2917 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2918 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2919 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2920 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2921 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2922}
2923
2924
2925/**
2926 * Frees a temporary immediate register.
2927 *
2928 * It is assumed that the call has not modified the register, so it still hold
2929 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2930 */
2931DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2932{
2933 iemNativeRegFreeTmp(pReNative, idxHstReg);
2934}
2935
2936
2937/**
2938 * Called right before emitting a call instruction to move anything important
2939 * out of call-volatile registers, free and flush the call-volatile registers,
2940 * optionally freeing argument variables.
2941 *
2942 * @returns New code buffer offset, UINT32_MAX on failure.
2943 * @param pReNative The native recompile state.
2944 * @param off The code buffer offset.
2945 * @param cArgs The number of arguments the function call takes.
2946 * It is presumed that the host register part of these have
2947 * been allocated as such already and won't need moving,
2948 * just freeing.
2949 */
2950DECL_HIDDEN_THROW(uint32_t)
2951iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2952{
2953 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
2954
2955 /*
2956 * Move anything important out of volatile registers.
2957 */
2958 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2959 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2960 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2961#ifdef IEMNATIVE_REG_FIXED_TMP0
2962 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2963#endif
2964 & ~g_afIemNativeCallRegs[cArgs];
2965
2966 fRegsToMove &= pReNative->Core.bmHstRegs;
2967 if (!fRegsToMove)
2968 { /* likely */ }
2969 else
2970 while (fRegsToMove != 0)
2971 {
2972 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2973 fRegsToMove &= ~RT_BIT_32(idxReg);
2974
2975 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2976 {
2977 case kIemNativeWhat_Var:
2978 {
2979 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2980 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2981 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2982 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2983 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2984 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2985 else
2986 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2987 continue;
2988 }
2989
2990 case kIemNativeWhat_Arg:
2991 AssertMsgFailed(("What?!?: %u\n", idxReg));
2992 continue;
2993
2994 case kIemNativeWhat_rc:
2995 case kIemNativeWhat_Tmp:
2996 AssertMsgFailed(("Missing free: %u\n", idxReg));
2997 continue;
2998
2999 case kIemNativeWhat_FixedTmp:
3000 case kIemNativeWhat_pVCpuFixed:
3001 case kIemNativeWhat_pCtxFixed:
3002 case kIemNativeWhat_FixedReserved:
3003 case kIemNativeWhat_Invalid:
3004 case kIemNativeWhat_End:
3005 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3006 }
3007 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3008 }
3009
3010 /*
3011 * Do the actual freeing.
3012 */
3013 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3014
3015 /* If there are guest register shadows in any call-volatile register, we
3016 have to clear the corrsponding guest register masks for each register. */
3017 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3018 if (fHstRegsWithGstShadow)
3019 {
3020 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3021 do
3022 {
3023 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3024 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
3025
3026 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3027 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3028 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3029 } while (fHstRegsWithGstShadow != 0);
3030 }
3031
3032 return off;
3033}
3034
3035
3036/**
3037 * Flushes a set of guest register shadow copies.
3038 *
3039 * This is usually done after calling a threaded function or a C-implementation
3040 * of an instruction.
3041 *
3042 * @param pReNative The native recompile state.
3043 * @param fGstRegs Set of guest registers to flush.
3044 */
3045DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3046{
3047 /*
3048 * Reduce the mask by what's currently shadowed
3049 */
3050 fGstRegs &= pReNative->Core.bmGstRegShadows;
3051 if (fGstRegs)
3052 {
3053 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3054 if (pReNative->Core.bmGstRegShadows)
3055 {
3056 /*
3057 * Partial.
3058 */
3059 do
3060 {
3061 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3062 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3063 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3064 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3065 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3066
3067 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3068 fGstRegs &= ~fInThisHstReg;
3069 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
3070 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3071 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3072 } while (fGstRegs != 0);
3073 }
3074 else
3075 {
3076 /*
3077 * Clear all.
3078 */
3079 do
3080 {
3081 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3082 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3083 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3084 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3085 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3086
3087 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3088 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3089 } while (fGstRegs != 0);
3090 pReNative->Core.bmHstRegsWithGstShadow = 0;
3091 }
3092 }
3093}
3094
3095
3096/**
3097 * Flushes any delayed guest register writes.
3098 *
3099 * This must be called prior to calling CImpl functions and any helpers that use
3100 * the guest state (like raising exceptions) and such.
3101 *
3102 * This optimization has not yet been implemented. The first target would be
3103 * RIP updates, since these are the most common ones.
3104 */
3105DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3106{
3107 RT_NOREF(pReNative, off);
3108 return off;
3109}
3110
3111
3112/*********************************************************************************************************************************
3113* Code Emitters (larger snippets) *
3114*********************************************************************************************************************************/
3115
3116/**
3117 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3118 * extending to 64-bit width.
3119 *
3120 * @returns New code buffer offset on success, UINT32_MAX on failure.
3121 * @param pReNative .
3122 * @param off The current code buffer position.
3123 * @param idxHstReg The host register to load the guest register value into.
3124 * @param enmGstReg The guest register to load.
3125 *
3126 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3127 * that is something the caller needs to do if applicable.
3128 */
3129DECL_HIDDEN_THROW(uint32_t)
3130iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3131{
3132 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3133 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3134
3135 switch (g_aGstShadowInfo[enmGstReg].cb)
3136 {
3137 case sizeof(uint64_t):
3138 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3139 case sizeof(uint32_t):
3140 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3141 case sizeof(uint16_t):
3142 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3143#if 0 /* not present in the table. */
3144 case sizeof(uint8_t):
3145 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3146#endif
3147 default:
3148 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3149 }
3150}
3151
3152
3153#ifdef VBOX_STRICT
3154/**
3155 * Emitting code that checks that the content of register @a idxReg is the same
3156 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3157 * instruction if that's not the case.
3158 *
3159 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3160 * Trashes EFLAGS on AMD64.
3161 */
3162static uint32_t
3163iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3164{
3165# ifdef RT_ARCH_AMD64
3166 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3167
3168 /* cmp reg, [mem] */
3169 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3170 {
3171 if (idxReg >= 8)
3172 pbCodeBuf[off++] = X86_OP_REX_R;
3173 pbCodeBuf[off++] = 0x38;
3174 }
3175 else
3176 {
3177 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3178 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3179 else
3180 {
3181 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3182 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3183 else
3184 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3185 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3186 if (idxReg >= 8)
3187 pbCodeBuf[off++] = X86_OP_REX_R;
3188 }
3189 pbCodeBuf[off++] = 0x39;
3190 }
3191 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3192
3193 /* je/jz +1 */
3194 pbCodeBuf[off++] = 0x74;
3195 pbCodeBuf[off++] = 0x01;
3196
3197 /* int3 */
3198 pbCodeBuf[off++] = 0xcc;
3199
3200 /* For values smaller than the register size, we must check that the rest
3201 of the register is all zeros. */
3202 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3203 {
3204 /* test reg64, imm32 */
3205 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3206 pbCodeBuf[off++] = 0xf7;
3207 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3208 pbCodeBuf[off++] = 0;
3209 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3210 pbCodeBuf[off++] = 0xff;
3211 pbCodeBuf[off++] = 0xff;
3212
3213 /* je/jz +1 */
3214 pbCodeBuf[off++] = 0x74;
3215 pbCodeBuf[off++] = 0x01;
3216
3217 /* int3 */
3218 pbCodeBuf[off++] = 0xcc;
3219 }
3220 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3221 {
3222 /* rol reg64, 32 */
3223 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3224 pbCodeBuf[off++] = 0xc1;
3225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3226 pbCodeBuf[off++] = 32;
3227
3228 /* test reg32, ffffffffh */
3229 if (idxReg >= 8)
3230 pbCodeBuf[off++] = X86_OP_REX_B;
3231 pbCodeBuf[off++] = 0xf7;
3232 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3233 pbCodeBuf[off++] = 0xff;
3234 pbCodeBuf[off++] = 0xff;
3235 pbCodeBuf[off++] = 0xff;
3236 pbCodeBuf[off++] = 0xff;
3237
3238 /* je/jz +1 */
3239 pbCodeBuf[off++] = 0x74;
3240 pbCodeBuf[off++] = 0x01;
3241
3242 /* int3 */
3243 pbCodeBuf[off++] = 0xcc;
3244
3245 /* rol reg64, 32 */
3246 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3247 pbCodeBuf[off++] = 0xc1;
3248 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3249 pbCodeBuf[off++] = 32;
3250 }
3251
3252# elif defined(RT_ARCH_ARM64)
3253 /* mov TMP0, [gstreg] */
3254 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3255
3256 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3257 /* sub tmp0, tmp0, idxReg */
3258 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3259 /* cbz tmp0, +1 */
3260 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3261 /* brk #0x1000+enmGstReg */
3262 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3263
3264# else
3265# error "Port me!"
3266# endif
3267 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3268 return off;
3269}
3270#endif /* VBOX_STRICT */
3271
3272
3273
3274/**
3275 * Emits a code for checking the return code of a call and rcPassUp, returning
3276 * from the code if either are non-zero.
3277 */
3278DECL_HIDDEN_THROW(uint32_t)
3279iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3280{
3281#ifdef RT_ARCH_AMD64
3282 /*
3283 * AMD64: eax = call status code.
3284 */
3285
3286 /* edx = rcPassUp */
3287 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3288# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3289 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3290# endif
3291
3292 /* edx = eax | rcPassUp */
3293 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3294 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3295 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3297
3298 /* Jump to non-zero status return path. */
3299 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3300
3301 /* done. */
3302
3303#elif RT_ARCH_ARM64
3304 /*
3305 * ARM64: w0 = call status code.
3306 */
3307 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3308 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3309
3310 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3311
3312 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3313
3314 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3315 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3316 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3317
3318#else
3319# error "port me"
3320#endif
3321 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3322 return off;
3323}
3324
3325
3326/**
3327 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3328 * raising a \#GP(0) if it isn't.
3329 *
3330 * @returns New code buffer offset, UINT32_MAX on failure.
3331 * @param pReNative The native recompile state.
3332 * @param off The code buffer offset.
3333 * @param idxAddrReg The host register with the address to check.
3334 * @param idxInstr The current instruction.
3335 */
3336DECL_HIDDEN_THROW(uint32_t)
3337iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3338{
3339 RT_NOREF(idxInstr);
3340
3341 /*
3342 * Make sure we don't have any outstanding guest register writes as we may
3343 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3344 */
3345 off = iemNativeRegFlushPendingWrites(pReNative, off);
3346
3347#ifdef RT_ARCH_AMD64
3348 /*
3349 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3350 * return raisexcpt();
3351 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3352 */
3353 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3354
3355 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3356 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3357 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3358 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3359
3360# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3361 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3362# else
3363 uint32_t const offFixup = off;
3364 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3365 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3366 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3367 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3368# endif
3369
3370 iemNativeRegFreeTmp(pReNative, iTmpReg);
3371
3372#elif defined(RT_ARCH_ARM64)
3373 /*
3374 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3375 * return raisexcpt();
3376 * ----
3377 * mov x1, 0x800000000000
3378 * add x1, x0, x1
3379 * cmp xzr, x1, lsr 48
3380 * and either:
3381 * b.ne .Lraisexcpt
3382 * or:
3383 * b.eq .Lnoexcept
3384 * movz x1, #instruction-number
3385 * b .Lraisexcpt
3386 * .Lnoexcept:
3387 */
3388 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3389
3390 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3391 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3392 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3393
3394# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3395 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3396# else
3397 uint32_t const offFixup = off;
3398 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3399 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3400 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3401 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3402# endif
3403
3404 iemNativeRegFreeTmp(pReNative, iTmpReg);
3405
3406#else
3407# error "Port me"
3408#endif
3409 return off;
3410}
3411
3412
3413/**
3414 * Emits code to check if the content of @a idxAddrReg is within the limit of
3415 * idxSegReg, raising a \#GP(0) if it isn't.
3416 *
3417 * @returns New code buffer offset; throws VBox status code on error.
3418 * @param pReNative The native recompile state.
3419 * @param off The code buffer offset.
3420 * @param idxAddrReg The host register (32-bit) with the address to
3421 * check.
3422 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3423 * against.
3424 * @param idxInstr The current instruction.
3425 */
3426DECL_HIDDEN_THROW(uint32_t)
3427iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3428 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3429{
3430 /*
3431 * Make sure we don't have any outstanding guest register writes as we may
3432 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3433 */
3434 off = iemNativeRegFlushPendingWrites(pReNative, off);
3435
3436 /** @todo implement expand down/whatnot checking */
3437 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3438
3439 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3440 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3441 kIemNativeGstRegUse_ForUpdate);
3442
3443 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3444
3445#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3446 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3447 RT_NOREF(idxInstr);
3448#else
3449 uint32_t const offFixup = off;
3450 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3451 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3452 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3453 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3454#endif
3455
3456 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3457 return off;
3458}
3459
3460
3461/**
3462 * Emits a call to a CImpl function or something similar.
3463 */
3464static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3465 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3466 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3467{
3468 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3469 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3470
3471 /*
3472 * Load the parameters.
3473 */
3474#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3475 /* Special code the hidden VBOXSTRICTRC pointer. */
3476 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3477 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3478 if (cAddParams > 0)
3479 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3480 if (cAddParams > 1)
3481 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3482 if (cAddParams > 2)
3483 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3484 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3485
3486#else
3487 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3488 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3489 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3490 if (cAddParams > 0)
3491 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3492 if (cAddParams > 1)
3493 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3494 if (cAddParams > 2)
3495# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3496 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3497# else
3498 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3499# endif
3500#endif
3501
3502 /*
3503 * Make the call.
3504 */
3505 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3506
3507#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3508 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3509#endif
3510
3511 /*
3512 * Check the status code.
3513 */
3514 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3515}
3516
3517
3518/**
3519 * Emits a call to a threaded worker function.
3520 */
3521static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3522{
3523 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3524 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3525 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3526
3527#ifdef RT_ARCH_AMD64
3528 /* Load the parameters and emit the call. */
3529# ifdef RT_OS_WINDOWS
3530# ifndef VBOXSTRICTRC_STRICT_ENABLED
3531 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3532 if (cParams > 0)
3533 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3534 if (cParams > 1)
3535 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3536 if (cParams > 2)
3537 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3538# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3539 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3540 if (cParams > 0)
3541 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3542 if (cParams > 1)
3543 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3544 if (cParams > 2)
3545 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3546 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3547 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3548# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3549# else
3550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3551 if (cParams > 0)
3552 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3553 if (cParams > 1)
3554 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3555 if (cParams > 2)
3556 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3557# endif
3558
3559 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3560
3561# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3562 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3563# endif
3564
3565#elif RT_ARCH_ARM64
3566 /*
3567 * ARM64:
3568 */
3569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3570 if (cParams > 0)
3571 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3572 if (cParams > 1)
3573 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3574 if (cParams > 2)
3575 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3576
3577 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3578
3579#else
3580# error "port me"
3581#endif
3582
3583 /*
3584 * Check the status code.
3585 */
3586 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3587
3588 return off;
3589}
3590
3591
3592/**
3593 * Emits the code at the RaiseGP0 label.
3594 */
3595static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3596{
3597 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3598 if (idxLabel != UINT32_MAX)
3599 {
3600 iemNativeLabelDefine(pReNative, idxLabel, off);
3601
3602 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3603 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3604#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3605 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3606#endif
3607 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3608
3609 /* jump back to the return sequence. */
3610 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3611 }
3612 return off;
3613}
3614
3615
3616/**
3617 * Emits the code at the ReturnWithFlags label (returns
3618 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3619 */
3620static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3621{
3622 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3623 if (idxLabel != UINT32_MAX)
3624 {
3625 iemNativeLabelDefine(pReNative, idxLabel, off);
3626
3627 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3628
3629 /* jump back to the return sequence. */
3630 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3631 }
3632 return off;
3633}
3634
3635
3636/**
3637 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3638 */
3639static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3640{
3641 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3642 if (idxLabel != UINT32_MAX)
3643 {
3644 iemNativeLabelDefine(pReNative, idxLabel, off);
3645
3646 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3647
3648 /* jump back to the return sequence. */
3649 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3650 }
3651 return off;
3652}
3653
3654
3655/**
3656 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3657 */
3658static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3659{
3660 /*
3661 * Generate the rc + rcPassUp fiddling code if needed.
3662 */
3663 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3664 if (idxLabel != UINT32_MAX)
3665 {
3666 iemNativeLabelDefine(pReNative, idxLabel, off);
3667
3668 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3669#ifdef RT_ARCH_AMD64
3670# ifdef RT_OS_WINDOWS
3671# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3672 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3673# endif
3674 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3675 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3676# else
3677 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3678 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3679# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3680 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3681# endif
3682# endif
3683# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3684 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3685# endif
3686
3687#else
3688 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3689 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3690 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3691#endif
3692
3693 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3694 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3695 }
3696 return off;
3697}
3698
3699
3700/**
3701 * Emits a standard epilog.
3702 */
3703static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3704{
3705 *pidxReturnLabel = UINT32_MAX;
3706
3707 /*
3708 * Successful return, so clear the return register (eax, w0).
3709 */
3710 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3711
3712 /*
3713 * Define label for common return point.
3714 */
3715 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3716 *pidxReturnLabel = idxReturn;
3717
3718 /*
3719 * Restore registers and return.
3720 */
3721#ifdef RT_ARCH_AMD64
3722 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3723
3724 /* Reposition esp at the r15 restore point. */
3725 pbCodeBuf[off++] = X86_OP_REX_W;
3726 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3727 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3728 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3729
3730 /* Pop non-volatile registers and return */
3731 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3732 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3733 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3734 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3735 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3736 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3737 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3738 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3739# ifdef RT_OS_WINDOWS
3740 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3741 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3742# endif
3743 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3744 pbCodeBuf[off++] = 0xc9; /* leave */
3745 pbCodeBuf[off++] = 0xc3; /* ret */
3746 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3747
3748#elif RT_ARCH_ARM64
3749 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3750
3751 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3752 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3753 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3754 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3755 IEMNATIVE_FRAME_VAR_SIZE / 8);
3756 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3757 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3758 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3759 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3760 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3761 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3762 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3763 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3764 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3765 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3766 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3767 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3768
3769 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3770 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3771 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3772 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3773
3774 /* retab / ret */
3775# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3776 if (1)
3777 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3778 else
3779# endif
3780 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3781
3782#else
3783# error "port me"
3784#endif
3785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3786
3787 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3788}
3789
3790
3791/**
3792 * Emits a standard prolog.
3793 */
3794static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3795{
3796#ifdef RT_ARCH_AMD64
3797 /*
3798 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3799 * reserving 64 bytes for stack variables plus 4 non-register argument
3800 * slots. Fixed register assignment: xBX = pReNative;
3801 *
3802 * Since we always do the same register spilling, we can use the same
3803 * unwind description for all the code.
3804 */
3805 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3806 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3807 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3808 pbCodeBuf[off++] = 0x8b;
3809 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3810 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3811 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3812# ifdef RT_OS_WINDOWS
3813 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3814 pbCodeBuf[off++] = 0x8b;
3815 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3816 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3817 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3818# else
3819 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3820 pbCodeBuf[off++] = 0x8b;
3821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3822# endif
3823 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3824 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3825 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3826 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3827 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3828 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3829 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3830 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3831
3832 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3833 X86_GREG_xSP,
3834 IEMNATIVE_FRAME_ALIGN_SIZE
3835 + IEMNATIVE_FRAME_VAR_SIZE
3836 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3837 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3838 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3839 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3840 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3841
3842#elif RT_ARCH_ARM64
3843 /*
3844 * We set up a stack frame exactly like on x86, only we have to push the
3845 * return address our selves here. We save all non-volatile registers.
3846 */
3847 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3848
3849# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3850 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3851 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3852 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3853 /* pacibsp */
3854 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3855# endif
3856
3857 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3858 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3859 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3860 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3861 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3862 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3863 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3864 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3865 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3866 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3867 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3868 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3869 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3870 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3871 /* Save the BP and LR (ret address) registers at the top of the frame. */
3872 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3873 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3874 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3875 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3877 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3878
3879 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3880 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3881
3882 /* mov r28, r0 */
3883 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3884 /* mov r27, r1 */
3885 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3886
3887#else
3888# error "port me"
3889#endif
3890 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3891 return off;
3892}
3893
3894
3895
3896
3897/*********************************************************************************************************************************
3898* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
3899*********************************************************************************************************************************/
3900
3901#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3902 { \
3903 pReNative->fMc = (a_fMcFlags); \
3904 pReNative->fCImpl = (a_fCImplFlags); \
3905 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
3906
3907/** We have to get to the end in recompilation mode, as otherwise we won't
3908 * generate code for all the IEM_MC_IF_XXX branches. */
3909#define IEM_MC_END() \
3910 } return off
3911
3912
3913
3914/*********************************************************************************************************************************
3915* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
3916*********************************************************************************************************************************/
3917
3918#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3919 pReNative->fMc = 0; \
3920 pReNative->fCImpl = (a_fFlags); \
3921 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3922
3923
3924#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3925 pReNative->fMc = 0; \
3926 pReNative->fCImpl = (a_fFlags); \
3927 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3928
3929DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3930 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3931{
3932 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3933}
3934
3935
3936#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3937 pReNative->fMc = 0; \
3938 pReNative->fCImpl = (a_fFlags); \
3939 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3940
3941DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3942 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3943{
3944 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3945}
3946
3947
3948#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3949 pReNative->fMc = 0; \
3950 pReNative->fCImpl = (a_fFlags); \
3951 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3952
3953DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3954 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
3955 uint64_t uArg2)
3956{
3957 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3958}
3959
3960
3961
3962/*********************************************************************************************************************************
3963* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
3964*********************************************************************************************************************************/
3965
3966/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
3967 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
3968DECL_INLINE_THROW(uint32_t)
3969iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3970{
3971 /*
3972 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
3973 * return with special status code and make the execution loop deal with
3974 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
3975 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
3976 * could continue w/o interruption, it probably will drop into the
3977 * debugger, so not worth the effort of trying to services it here and we
3978 * just lump it in with the handling of the others.
3979 *
3980 * To simplify the code and the register state management even more (wrt
3981 * immediate in AND operation), we always update the flags and skip the
3982 * extra check associated conditional jump.
3983 */
3984 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
3985 <= UINT32_MAX);
3986 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3987 kIemNativeGstRegUse_ForUpdate);
3988 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
3989 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
3990 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
3991 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
3992 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
3993
3994 /* Free but don't flush the EFLAGS register. */
3995 iemNativeRegFreeTmp(pReNative, idxEflReg);
3996
3997 return off;
3998}
3999
4000
4001#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4002 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4003
4004#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4005 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4007
4008/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4009DECL_INLINE_THROW(uint32_t)
4010iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4011{
4012 /* Allocate a temporary PC register. */
4013 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4014
4015 /* Perform the addition and store the result. */
4016 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4017 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4018
4019 /* Free but don't flush the PC register. */
4020 iemNativeRegFreeTmp(pReNative, idxPcReg);
4021
4022 return off;
4023}
4024
4025
4026#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4027 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4028
4029#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4030 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4031 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4032
4033/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4034DECL_INLINE_THROW(uint32_t)
4035iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4036{
4037 /* Allocate a temporary PC register. */
4038 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4039
4040 /* Perform the addition and store the result. */
4041 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4042 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4043
4044 /* Free but don't flush the PC register. */
4045 iemNativeRegFreeTmp(pReNative, idxPcReg);
4046
4047 return off;
4048}
4049
4050
4051#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4052 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4053
4054#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4055 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4056 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4057
4058/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4059DECL_INLINE_THROW(uint32_t)
4060iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4061{
4062 /* Allocate a temporary PC register. */
4063 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4064
4065 /* Perform the addition and store the result. */
4066 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4067 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4068 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4069
4070 /* Free but don't flush the PC register. */
4071 iemNativeRegFreeTmp(pReNative, idxPcReg);
4072
4073 return off;
4074}
4075
4076
4077
4078/*********************************************************************************************************************************
4079* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4080*********************************************************************************************************************************/
4081
4082#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4083 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4084 (a_enmEffOpSize), pCallEntry->idxInstr)
4085
4086#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4087 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4088 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4089
4090#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4091 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4092 IEMMODE_16BIT, pCallEntry->idxInstr)
4093
4094#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4095 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4096 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4097
4098#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4099 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4100 IEMMODE_64BIT, pCallEntry->idxInstr)
4101
4102#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4103 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4104 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4105
4106/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4107 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4108 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4109DECL_INLINE_THROW(uint32_t)
4110iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4111 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4112{
4113 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4114
4115 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4116 off = iemNativeRegFlushPendingWrites(pReNative, off);
4117
4118 /* Allocate a temporary PC register. */
4119 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4120
4121 /* Perform the addition. */
4122 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4123
4124 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4125 {
4126 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4127 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4128 }
4129 else
4130 {
4131 /* Just truncate the result to 16-bit IP. */
4132 Assert(enmEffOpSize == IEMMODE_16BIT);
4133 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4134 }
4135 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4136
4137 /* Free but don't flush the PC register. */
4138 iemNativeRegFreeTmp(pReNative, idxPcReg);
4139
4140 return off;
4141}
4142
4143
4144#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4145 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4146 (a_enmEffOpSize), pCallEntry->idxInstr)
4147
4148#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4149 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4150 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4151
4152#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4153 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4154 IEMMODE_16BIT, pCallEntry->idxInstr)
4155
4156#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4157 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4158 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4159
4160#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4161 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4162 IEMMODE_32BIT, pCallEntry->idxInstr)
4163
4164#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4165 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4166 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4167
4168/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4169 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4170 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4171DECL_INLINE_THROW(uint32_t)
4172iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4173 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4174{
4175 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4176
4177 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4178 off = iemNativeRegFlushPendingWrites(pReNative, off);
4179
4180 /* Allocate a temporary PC register. */
4181 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4182
4183 /* Perform the addition. */
4184 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4185
4186 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4187 if (enmEffOpSize == IEMMODE_16BIT)
4188 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4189
4190 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4191 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4192
4193 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4194
4195 /* Free but don't flush the PC register. */
4196 iemNativeRegFreeTmp(pReNative, idxPcReg);
4197
4198 return off;
4199}
4200
4201
4202#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4203 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4204
4205#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4206 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4207 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4208
4209#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4210 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4211
4212#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4213 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4214 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4215
4216#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4217 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4218
4219#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4220 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4221 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4222
4223/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4224DECL_INLINE_THROW(uint32_t)
4225iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4226 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4227{
4228 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4229 off = iemNativeRegFlushPendingWrites(pReNative, off);
4230
4231 /* Allocate a temporary PC register. */
4232 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4233
4234 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4235 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4236 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4237 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4238 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4239
4240 /* Free but don't flush the PC register. */
4241 iemNativeRegFreeTmp(pReNative, idxPcReg);
4242
4243 return off;
4244}
4245
4246
4247
4248/*********************************************************************************************************************************
4249* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4250*********************************************************************************************************************************/
4251
4252/**
4253 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4254 *
4255 * @returns Pointer to the condition stack entry on success, NULL on failure
4256 * (too many nestings)
4257 */
4258DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4259{
4260 uint32_t const idxStack = pReNative->cCondDepth;
4261 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4262
4263 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4264 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4265
4266 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4267 pEntry->fInElse = false;
4268 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4269 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4270
4271 return pEntry;
4272}
4273
4274
4275/**
4276 * Start of the if-block, snapshotting the register and variable state.
4277 */
4278DECL_INLINE_THROW(void)
4279iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4280{
4281 Assert(offIfBlock != UINT32_MAX);
4282 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4283 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4284 Assert(!pEntry->fInElse);
4285
4286 /* Define the start of the IF block if request or for disassembly purposes. */
4287 if (idxLabelIf != UINT32_MAX)
4288 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4289#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4290 else
4291 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4292#else
4293 RT_NOREF(offIfBlock);
4294#endif
4295
4296 /* Copy the initial state so we can restore it in the 'else' block. */
4297 pEntry->InitialState = pReNative->Core;
4298}
4299
4300
4301#define IEM_MC_ELSE() } while (0); \
4302 off = iemNativeEmitElse(pReNative, off); \
4303 do {
4304
4305/** Emits code related to IEM_MC_ELSE. */
4306DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4307{
4308 /* Check sanity and get the conditional stack entry. */
4309 Assert(off != UINT32_MAX);
4310 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4311 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4312 Assert(!pEntry->fInElse);
4313
4314 /* Jump to the endif */
4315 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4316
4317 /* Define the else label and enter the else part of the condition. */
4318 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4319 pEntry->fInElse = true;
4320
4321 /* Snapshot the core state so we can do a merge at the endif and restore
4322 the snapshot we took at the start of the if-block. */
4323 pEntry->IfFinalState = pReNative->Core;
4324 pReNative->Core = pEntry->InitialState;
4325
4326 return off;
4327}
4328
4329
4330#define IEM_MC_ENDIF() } while (0); \
4331 off = iemNativeEmitEndIf(pReNative, off)
4332
4333/** Emits code related to IEM_MC_ENDIF. */
4334DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4335{
4336 /* Check sanity and get the conditional stack entry. */
4337 Assert(off != UINT32_MAX);
4338 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4339 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4340
4341 /*
4342 * Now we have find common group with the core state at the end of the
4343 * if-final. Use the smallest common denominator and just drop anything
4344 * that isn't the same in both states.
4345 */
4346 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4347 * which is why we're doing this at the end of the else-block.
4348 * But we'd need more info about future for that to be worth the effort. */
4349 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4350 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4351 {
4352 /* shadow guest stuff first. */
4353 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4354 if (fGstRegs)
4355 {
4356 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4357 do
4358 {
4359 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4360 fGstRegs &= ~RT_BIT_64(idxGstReg);
4361
4362 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4363 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4364 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4365 {
4366 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4367 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4368 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4369 }
4370 } while (fGstRegs);
4371 }
4372 else
4373 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4374
4375 /* Check variables next. For now we must require them to be identical
4376 or stuff we can recreate. */
4377 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4378 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4379 if (fVars)
4380 {
4381 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4382 do
4383 {
4384 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4385 fVars &= ~RT_BIT_32(idxVar);
4386
4387 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4388 {
4389 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4390 continue;
4391 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4392 {
4393 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4394 if (idxHstReg != UINT8_MAX)
4395 {
4396 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4397 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4398 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4399 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4400 }
4401 continue;
4402 }
4403 }
4404 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4405 continue;
4406
4407 /* Irreconcilable, so drop it. */
4408 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4409 if (idxHstReg != UINT8_MAX)
4410 {
4411 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4412 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4413 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4414 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4415 }
4416 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4417 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4418 } while (fVars);
4419 }
4420
4421 /* Finally, check that the host register allocations matches. */
4422 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4423 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4424 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4425 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4426 }
4427
4428 /*
4429 * Define the endif label and maybe the else one if we're still in the 'if' part.
4430 */
4431 if (!pEntry->fInElse)
4432 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4433 else
4434 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4435 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4436
4437 /* Pop the conditional stack.*/
4438 pReNative->cCondDepth -= 1;
4439
4440 return off;
4441}
4442
4443
4444#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4445 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4446 do {
4447
4448/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4449DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4450{
4451 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4452
4453 /* Get the eflags. */
4454 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4455 kIemNativeGstRegUse_ReadOnly);
4456
4457 /* Test and jump. */
4458 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4459
4460 /* Free but don't flush the EFlags register. */
4461 iemNativeRegFreeTmp(pReNative, idxEflReg);
4462
4463 /* Make a copy of the core state now as we start the if-block. */
4464 iemNativeCondStartIfBlock(pReNative, off);
4465
4466 return off;
4467}
4468
4469
4470#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4471 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4472 do {
4473
4474/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4475DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4476{
4477 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4478
4479 /* Get the eflags. */
4480 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4481 kIemNativeGstRegUse_ReadOnly);
4482
4483 /* Test and jump. */
4484 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4485
4486 /* Free but don't flush the EFlags register. */
4487 iemNativeRegFreeTmp(pReNative, idxEflReg);
4488
4489 /* Make a copy of the core state now as we start the if-block. */
4490 iemNativeCondStartIfBlock(pReNative, off);
4491
4492 return off;
4493}
4494
4495
4496#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4497 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4498 do {
4499
4500/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4501DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4502{
4503 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4504
4505 /* Get the eflags. */
4506 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4507 kIemNativeGstRegUse_ReadOnly);
4508
4509 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4510 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4511
4512 /* Test and jump. */
4513 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4514
4515 /* Free but don't flush the EFlags register. */
4516 iemNativeRegFreeTmp(pReNative, idxEflReg);
4517
4518 /* Make a copy of the core state now as we start the if-block. */
4519 iemNativeCondStartIfBlock(pReNative, off);
4520
4521 return off;
4522}
4523
4524
4525#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4526 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4527 do {
4528
4529/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4530DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4531{
4532 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4533
4534 /* Get the eflags. */
4535 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4536 kIemNativeGstRegUse_ReadOnly);
4537
4538 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4539 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4540
4541 /* Test and jump. */
4542 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4543
4544 /* Free but don't flush the EFlags register. */
4545 iemNativeRegFreeTmp(pReNative, idxEflReg);
4546
4547 /* Make a copy of the core state now as we start the if-block. */
4548 iemNativeCondStartIfBlock(pReNative, off);
4549
4550 return off;
4551}
4552
4553
4554#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4555 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4556 do {
4557
4558#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4559 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4560 do {
4561
4562/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4563DECL_INLINE_THROW(uint32_t)
4564iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4565 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4566{
4567 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4568
4569 /* Get the eflags. */
4570 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4571 kIemNativeGstRegUse_ReadOnly);
4572
4573 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4574 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4575
4576 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4577 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4578 Assert(iBitNo1 != iBitNo2);
4579
4580#ifdef RT_ARCH_AMD64
4581 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4582
4583 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4584 if (iBitNo1 > iBitNo2)
4585 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4586 else
4587 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4588 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4589
4590#elif defined(RT_ARCH_ARM64)
4591 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4592 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4593
4594 /* and tmpreg, eflreg, #1<<iBitNo1 */
4595 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4596
4597 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4598 if (iBitNo1 > iBitNo2)
4599 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4600 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4601 else
4602 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4603 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4604
4605 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4606
4607#else
4608# error "Port me"
4609#endif
4610
4611 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4612 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4613 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4614
4615 /* Free but don't flush the EFlags and tmp registers. */
4616 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4617 iemNativeRegFreeTmp(pReNative, idxEflReg);
4618
4619 /* Make a copy of the core state now as we start the if-block. */
4620 iemNativeCondStartIfBlock(pReNative, off);
4621
4622 return off;
4623}
4624
4625
4626#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4627 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4628 do {
4629
4630#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4631 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4632 do {
4633
4634/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4635 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4636DECL_INLINE_THROW(uint32_t)
4637iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4638 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4639{
4640 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4641
4642 /* We need an if-block label for the non-inverted variant. */
4643 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4644 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4645
4646 /* Get the eflags. */
4647 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4648 kIemNativeGstRegUse_ReadOnly);
4649
4650 /* Translate the flag masks to bit numbers. */
4651 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4652 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4653
4654 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4655 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4656 Assert(iBitNo1 != iBitNo);
4657
4658 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4659 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4660 Assert(iBitNo2 != iBitNo);
4661 Assert(iBitNo2 != iBitNo1);
4662
4663#ifdef RT_ARCH_AMD64
4664 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4665#elif defined(RT_ARCH_ARM64)
4666 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4667#endif
4668
4669 /* Check for the lone bit first. */
4670 if (!fInverted)
4671 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4672 else
4673 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4674
4675 /* Then extract and compare the other two bits. */
4676#ifdef RT_ARCH_AMD64
4677 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4678 if (iBitNo1 > iBitNo2)
4679 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4680 else
4681 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4682 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4683
4684#elif defined(RT_ARCH_ARM64)
4685 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4686
4687 /* and tmpreg, eflreg, #1<<iBitNo1 */
4688 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4689
4690 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4691 if (iBitNo1 > iBitNo2)
4692 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4693 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4694 else
4695 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4696 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4697
4698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4699
4700#else
4701# error "Port me"
4702#endif
4703
4704 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4705 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4706 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4707
4708 /* Free but don't flush the EFlags and tmp registers. */
4709 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4710 iemNativeRegFreeTmp(pReNative, idxEflReg);
4711
4712 /* Make a copy of the core state now as we start the if-block. */
4713 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4714
4715 return off;
4716}
4717
4718
4719#define IEM_MC_IF_CX_IS_NZ() \
4720 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4721 do {
4722
4723/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4724DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4725{
4726 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4727
4728 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4729 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4730 kIemNativeGstRegUse_ReadOnly);
4731 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4732 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4733
4734 iemNativeCondStartIfBlock(pReNative, off);
4735 return off;
4736}
4737
4738
4739#define IEM_MC_IF_ECX_IS_NZ() \
4740 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4741 do {
4742
4743#define IEM_MC_IF_RCX_IS_NZ() \
4744 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4745 do {
4746
4747/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4748DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4749{
4750 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4751
4752 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4753 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4754 kIemNativeGstRegUse_ReadOnly);
4755 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4756 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4757
4758 iemNativeCondStartIfBlock(pReNative, off);
4759 return off;
4760}
4761
4762
4763#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4764 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4765 do {
4766
4767#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4768 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4769 do {
4770
4771/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4772DECL_INLINE_THROW(uint32_t)
4773iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4774{
4775 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4776
4777 /* We have to load both RCX and EFLAGS before we can start branching,
4778 otherwise we'll end up in the else-block with an inconsistent
4779 register allocator state.
4780 Doing EFLAGS first as it's more likely to be loaded, right? */
4781 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4782 kIemNativeGstRegUse_ReadOnly);
4783 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4784 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4785 kIemNativeGstRegUse_ReadOnly);
4786
4787 /** @todo we could reduce this to a single branch instruction by spending a
4788 * temporary register and some setnz stuff. Not sure if loops are
4789 * worth it. */
4790 /* Check CX. */
4791 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4792
4793 /* Check the EFlags bit. */
4794 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4795 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4796 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4797 !fCheckIfSet /*fJmpIfSet*/);
4798
4799 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4800 iemNativeRegFreeTmp(pReNative, idxEflReg);
4801
4802 iemNativeCondStartIfBlock(pReNative, off);
4803 return off;
4804}
4805
4806
4807#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4808 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4809 do {
4810
4811#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4812 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4813 do {
4814
4815#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4816 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4817 do {
4818
4819#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4820 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4821 do {
4822
4823/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4824 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4825 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4826 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4827DECL_INLINE_THROW(uint32_t)
4828iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4829 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4830{
4831 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4832
4833 /* We have to load both RCX and EFLAGS before we can start branching,
4834 otherwise we'll end up in the else-block with an inconsistent
4835 register allocator state.
4836 Doing EFLAGS first as it's more likely to be loaded, right? */
4837 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4838 kIemNativeGstRegUse_ReadOnly);
4839 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4840 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4841 kIemNativeGstRegUse_ReadOnly);
4842
4843 /** @todo we could reduce this to a single branch instruction by spending a
4844 * temporary register and some setnz stuff. Not sure if loops are
4845 * worth it. */
4846 /* Check RCX/ECX. */
4847 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4848
4849 /* Check the EFlags bit. */
4850 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4851 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4852 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4853 !fCheckIfSet /*fJmpIfSet*/);
4854
4855 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4856 iemNativeRegFreeTmp(pReNative, idxEflReg);
4857
4858 iemNativeCondStartIfBlock(pReNative, off);
4859 return off;
4860}
4861
4862
4863
4864/*********************************************************************************************************************************
4865* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
4866*********************************************************************************************************************************/
4867/** Number of hidden arguments for CIMPL calls.
4868 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
4869#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4870# define IEM_CIMPL_HIDDEN_ARGS 3
4871#else
4872# define IEM_CIMPL_HIDDEN_ARGS 2
4873#endif
4874
4875#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
4876 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
4877
4878#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
4879 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
4880
4881#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
4882 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
4883
4884#define IEM_MC_LOCAL(a_Type, a_Name) \
4885 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
4886
4887#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
4888 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
4889
4890
4891/**
4892 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
4893 */
4894DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
4895{
4896 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
4897 return IEM_CIMPL_HIDDEN_ARGS;
4898 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
4899 return 1;
4900 return 0;
4901}
4902
4903
4904/**
4905 * Internal work that allocates a variable with kind set to
4906 * kIemNativeVarKind_Invalid and no current stack allocation.
4907 *
4908 * The kind will either be set by the caller or later when the variable is first
4909 * assigned a value.
4910 */
4911static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
4912{
4913 Assert(cbType > 0 && cbType <= 64);
4914 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
4915 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
4916 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
4917 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
4918 pReNative->Core.aVars[idxVar].cbVar = cbType;
4919 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
4920 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4921 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
4922 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
4923 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
4924 pReNative->Core.aVars[idxVar].u.uValue = 0;
4925 return idxVar;
4926}
4927
4928
4929/**
4930 * Internal work that allocates an argument variable w/o setting enmKind.
4931 */
4932static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
4933{
4934 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
4935 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
4936 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
4937
4938 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
4939 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
4940 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
4941 return idxVar;
4942}
4943
4944
4945/**
4946 * Changes the variable to a stack variable.
4947 *
4948 * Currently this is s only possible to do the first time the variable is used,
4949 * switching later is can be implemented but not done.
4950 *
4951 * @param pReNative The recompiler state.
4952 * @param idxVar The variable.
4953 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
4954 */
4955static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4956{
4957 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
4958 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4959 {
4960 /* We could in theory transition from immediate to stack as well, but it
4961 would involve the caller doing work storing the value on the stack. So,
4962 till that's required we only allow transition from invalid. */
4963 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
4964 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
4965 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
4966
4967 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
4968 {
4969 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
4970 {
4971 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
4972 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4973 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
4974 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
4975 return;
4976 }
4977 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
4978 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
4979 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
4980 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
4981 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
4982 uint32_t bmStack = ~pReNative->Core.bmStack;
4983 while (bmStack != UINT32_MAX)
4984 {
4985 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
4986 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4987 if (!(iSlot & fBitAlignMask))
4988 {
4989 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
4990 {
4991 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
4992 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
4993 return;
4994 }
4995 }
4996 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
4997 }
4998 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4999 }
5000 }
5001}
5002
5003
5004/**
5005 * Changes it to a variable with a constant value.
5006 *
5007 * This does not require stack storage as we know the value and can always
5008 * reload it, unless of course it's referenced.
5009 *
5010 * @param pReNative The recompiler state.
5011 * @param idxVar The variable.
5012 * @param uValue The immediate value.
5013 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5014 */
5015static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5016{
5017 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5018 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5019 {
5020 /* Only simple trasnsitions for now. */
5021 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5022 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5023 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5024 }
5025 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5026}
5027
5028
5029/**
5030 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5031 *
5032 * @param pReNative The recompiler state.
5033 * @param idxVar The variable.
5034 * @param idxOtherVar The variable to take the (stack) address of.
5035 *
5036 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5037 */
5038static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5039{
5040 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5041 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5042
5043 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5044 {
5045 /* Only simple trasnsitions for now. */
5046 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5047 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5048 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5049 }
5050 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5051
5052 /* Update the other variable, ensure it's a stack variable. */
5053 /** @todo handle variables with const values... that's go boom now. */
5054 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5055 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5056}
5057
5058
5059DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5060{
5061 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5062}
5063
5064
5065DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5066{
5067 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5068 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5069 return idxVar;
5070}
5071
5072
5073DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5074{
5075 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5076 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5077 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5078 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5079
5080 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5081 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5082 return idxArgVar;
5083}
5084
5085
5086DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5087{
5088 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5089 iemNativeVarSetKindToStack(pReNative, idxVar);
5090 return idxVar;
5091}
5092
5093
5094DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5095{
5096 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5097 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5098 return idxVar;
5099}
5100
5101
5102/**
5103 * Makes sure variable @a idxVar has a register assigned to it.
5104 *
5105 * @returns The host register number.
5106 * @param pReNative The recompiler state.
5107 * @param idxVar The variable.
5108 * @param poff Pointer to the instruction buffer offset.
5109 * In case a register needs to be freed up.
5110 */
5111DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5112{
5113 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5114
5115 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5116 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5117 return idxReg;
5118
5119 /*
5120 * We have to allocate a register for the variable, even if its a stack one
5121 * as we don't know if there are modification being made to it before its
5122 * finalized (todo: analyze and insert hints about that?).
5123 *
5124 * If we can, we try get the correct register for argument variables. This
5125 * is assuming that most argument variables are fetched as close as possible
5126 * to the actual call, so that there aren't any interfering hidden calls
5127 * (memory accesses, etc) inbetween.
5128 *
5129 * If we cannot or it's a variable, we make sure no argument registers
5130 * that will be used by this MC block will be allocated here, and we always
5131 * prefer non-volatile registers to avoid needing to spill stuff for internal
5132 * call.
5133 */
5134 /** @todo Detect too early argument value fetches and warn about hidden
5135 * calls causing less optimal code to be generated in the python script. */
5136
5137 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5138 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5139 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5140 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5141 else
5142 {
5143 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5144 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5145 & ~pReNative->Core.bmHstRegsWithGstShadow
5146 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5147 & fNotArgsMask;
5148 if (fRegs)
5149 {
5150 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5151 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5152 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5153 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5154 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5155 }
5156 else
5157 {
5158 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5159 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5160 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5161 }
5162 }
5163 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5164 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5165 return idxReg;
5166}
5167
5168
5169
5170/*********************************************************************************************************************************
5171* Emitters for IEM_MC_CALL_CIMPL_XXX *
5172*********************************************************************************************************************************/
5173
5174/**
5175 * Emits code to load a reference to the given guest register into @a idxGprDst.
5176 */
5177DECL_INLINE_THROW(uint32_t)
5178iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5179 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5180{
5181 /*
5182 * Get the offset relative to the CPUMCTX structure.
5183 */
5184 uint32_t offCpumCtx;
5185 switch (enmClass)
5186 {
5187 case kIemNativeGstRegRef_Gpr:
5188 Assert(idxRegInClass < 16);
5189 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5190 break;
5191
5192 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5193 Assert(idxRegInClass < 4);
5194 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5195 break;
5196
5197 case kIemNativeGstRegRef_EFlags:
5198 Assert(idxRegInClass == 0);
5199 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5200 break;
5201
5202 case kIemNativeGstRegRef_MxCsr:
5203 Assert(idxRegInClass == 0);
5204 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5205 break;
5206
5207 case kIemNativeGstRegRef_FpuReg:
5208 Assert(idxRegInClass < 8);
5209 AssertFailed(); /** @todo what kind of indexing? */
5210 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5211 break;
5212
5213 case kIemNativeGstRegRef_MReg:
5214 Assert(idxRegInClass < 8);
5215 AssertFailed(); /** @todo what kind of indexing? */
5216 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5217 break;
5218
5219 case kIemNativeGstRegRef_XReg:
5220 Assert(idxRegInClass < 16);
5221 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5222 break;
5223
5224 default:
5225 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5226 }
5227
5228 /*
5229 * Load the value into the destination register.
5230 */
5231#ifdef RT_ARCH_AMD64
5232 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5233
5234#elif defined(RT_ARCH_ARM64)
5235 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5236 Assert(offCpumCtx < 4096);
5237 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5238
5239#else
5240# error "Port me!"
5241#endif
5242
5243 return off;
5244}
5245
5246
5247/**
5248 * Common code for CIMPL and AIMPL calls.
5249 *
5250 * These are calls that uses argument variables and such. They should not be
5251 * confused with internal calls required to implement an MC operation,
5252 * like a TLB load and similar.
5253 *
5254 * Upon return all that is left to do is to load any hidden arguments and
5255 * perform the call. All argument variables are freed.
5256 *
5257 * @returns New code buffer offset; throws VBox status code on error.
5258 * @param pReNative The native recompile state.
5259 * @param off The code buffer offset.
5260 * @param cArgs The total nubmer of arguments (includes hidden
5261 * count).
5262 * @param cHiddenArgs The number of hidden arguments. The hidden
5263 * arguments must not have any variable declared for
5264 * them, whereas all the regular arguments must
5265 * (tstIEMCheckMc ensures this).
5266 */
5267DECL_HIDDEN_THROW(uint32_t)
5268iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5269{
5270#ifdef VBOX_STRICT
5271 /*
5272 * Assert sanity.
5273 */
5274 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5275 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5276 for (unsigned i = 0; i < cHiddenArgs; i++)
5277 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5278 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5279 {
5280 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5281 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5282 }
5283#endif
5284
5285 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5286
5287 /*
5288 * First, go over the host registers that will be used for arguments and make
5289 * sure they either hold the desired argument or are free.
5290 */
5291 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5292 for (uint32_t i = 0; i < cRegArgs; i++)
5293 {
5294 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5295 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5296 {
5297 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5298 {
5299 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5300 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5301 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5302 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5303 if (uArgNo == i)
5304 { /* prefect */ }
5305 else
5306 {
5307 /* The variable allocator logic should make sure this is impossible. */
5308 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5309
5310 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5311 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5312 else
5313 {
5314 /* just free it, can be reloaded if used again */
5315 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5316 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5317 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5318 }
5319 }
5320 }
5321 else
5322 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5323 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5324 }
5325 }
5326
5327 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5328
5329 /*
5330 * Make sure the argument variables are loaded into their respective registers.
5331 *
5332 * We can optimize this by ASSUMING that any register allocations are for
5333 * registeres that have already been loaded and are ready. The previous step
5334 * saw to that.
5335 */
5336 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5337 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5338 {
5339 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5340 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5341 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5342 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5343 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5344 else
5345 {
5346 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5347 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5348 {
5349 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5350 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5351 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5352 | RT_BIT_32(idxArgReg);
5353 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5354 }
5355 else
5356 {
5357 /* Use ARG0 as temp for stuff we need registers for. */
5358 switch (pReNative->Core.aVars[idxVar].enmKind)
5359 {
5360 case kIemNativeVarKind_Stack:
5361 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5362 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5363 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg,
5364 IEMNATIVE_FP_OFF_STACK_VARS
5365 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5366 continue;
5367
5368 case kIemNativeVarKind_Immediate:
5369 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5370 continue;
5371
5372 case kIemNativeVarKind_VarRef:
5373 {
5374 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5375 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5376 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5377 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5378 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5379 IEMNATIVE_FP_OFF_STACK_VARS
5380 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5381 continue;
5382 }
5383
5384 case kIemNativeVarKind_GstRegRef:
5385 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5386 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5387 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5388 continue;
5389
5390 case kIemNativeVarKind_Invalid:
5391 case kIemNativeVarKind_End:
5392 break;
5393 }
5394 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5395 }
5396 }
5397 }
5398#ifdef VBOX_STRICT
5399 else
5400 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5401 {
5402 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5403 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5404 }
5405#endif
5406
5407#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5408 /*
5409 * If there are any stack arguments, make sure they are in their place as well.
5410 *
5411 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5412 * caller will load it later and it must be free (see first loop).
5413 */
5414 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5415 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5416 {
5417 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5418 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5419 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5420 {
5421 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5422 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5423 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5424 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5425 }
5426 else
5427 {
5428 /* Use ARG0 as temp for stuff we need registers for. */
5429 switch (pReNative->Core.aVars[idxVar].enmKind)
5430 {
5431 case kIemNativeVarKind_Stack:
5432 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5433 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5434 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5435 IEMNATIVE_FP_OFF_STACK_VARS
5436 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5437 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5438 continue;
5439
5440 case kIemNativeVarKind_Immediate:
5441 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5442 continue;
5443
5444 case kIemNativeVarKind_VarRef:
5445 {
5446 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5447 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5448 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5449 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5450 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5451 IEMNATIVE_FP_OFF_STACK_VARS
5452 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5453 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5454 continue;
5455 }
5456
5457 case kIemNativeVarKind_GstRegRef:
5458 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5459 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5460 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5461 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5462 continue;
5463
5464 case kIemNativeVarKind_Invalid:
5465 case kIemNativeVarKind_End:
5466 break;
5467 }
5468 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5469 }
5470 }
5471#else
5472 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5473#endif
5474
5475 /*
5476 * Free all argument variables (simplified).
5477 * Their lifetime always expires with the call they are for.
5478 */
5479 /** @todo Make the python script check that arguments aren't used after
5480 * IEM_MC_CALL_XXXX. */
5481 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5482 {
5483 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
5484 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5485 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5486 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5487 }
5488 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5489
5490 /*
5491 * Flush volatile registers as we make the call.
5492 */
5493 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5494
5495 return off;
5496}
5497
5498
5499/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5500DECL_HIDDEN_THROW(uint32_t)
5501iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5502 uintptr_t pfnCImpl, uint8_t cArgs)
5503
5504{
5505 /*
5506 * Do all the call setup and cleanup.
5507 */
5508 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5509
5510 /*
5511 * Load the two hidden arguments.
5512 */
5513#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5514 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5515 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5516 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5517#else
5518 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5519 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5520#endif
5521
5522 /*
5523 * Make the call and check the return code.
5524 */
5525 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5526#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5527 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5528#endif
5529 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5530}
5531
5532
5533#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
5534 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0)
5535
5536/** Emits code for IEM_MC_CALL_CIMPL_1. */
5537DECL_INLINE_THROW(uint32_t)
5538iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5539 uintptr_t pfnCImpl, uint8_t idxArg0)
5540{
5541 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5542 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5543 RT_NOREF_PV(idxArg0);
5544
5545 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 1);
5546}
5547
5548
5549#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
5550 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1)
5551
5552/** Emits code for IEM_MC_CALL_CIMPL_2. */
5553DECL_INLINE_THROW(uint32_t)
5554iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5555 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
5556{
5557 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5558 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5559 RT_NOREF_PV(idxArg0);
5560
5561 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5562 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5563 RT_NOREF_PV(idxArg1);
5564
5565 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 2);
5566}
5567
5568
5569#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
5570 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2)
5571
5572/** Emits code for IEM_MC_CALL_CIMPL_3. */
5573DECL_INLINE_THROW(uint32_t)
5574iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5575 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
5576{
5577pReNative->pInstrBuf[off++] = 0xcc;
5578 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5579 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5580 RT_NOREF_PV(idxArg0);
5581
5582 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5583 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5584 RT_NOREF_PV(idxArg1);
5585
5586 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5587 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5588 RT_NOREF_PV(idxArg2);
5589
5590 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 3);
5591}
5592
5593
5594#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3) \
5595 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
5596
5597/** Emits code for IEM_MC_CALL_CIMPL_4. */
5598DECL_INLINE_THROW(uint32_t)
5599iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5600 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
5601{
5602pReNative->pInstrBuf[off++] = 0xcc;
5603 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5604 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5605 RT_NOREF_PV(idxArg0);
5606
5607 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5608 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5609 RT_NOREF_PV(idxArg1);
5610
5611 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5612 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5613 RT_NOREF_PV(idxArg2);
5614
5615 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5616 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5617 RT_NOREF_PV(idxArg3);
5618
5619 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 4);
5620}
5621
5622
5623#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3, a4) \
5624 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
5625
5626/** Emits code for IEM_MC_CALL_CIMPL_4. */
5627DECL_INLINE_THROW(uint32_t)
5628iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5629 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
5630{
5631pReNative->pInstrBuf[off++] = 0xcc;
5632 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5633 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5634 RT_NOREF_PV(idxArg0);
5635
5636 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5637 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5638 RT_NOREF_PV(idxArg1);
5639
5640 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5641 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5642 RT_NOREF_PV(idxArg2);
5643
5644 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5645 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5646 RT_NOREF_PV(idxArg3);
5647
5648 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
5649 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
5650 RT_NOREF_PV(idxArg4);
5651
5652 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 5);
5653}
5654
5655
5656
5657/*********************************************************************************************************************************
5658* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
5659*********************************************************************************************************************************/
5660
5661#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
5662 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
5663
5664/** Emits code for IEM_MC_FETCH_GREG_U16. */
5665DECL_INLINE_THROW(uint32_t)
5666iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
5667{
5668 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
5669 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
5670
5671 /*
5672 * We can either just load the low 16-bit of the GPR into a host register
5673 * for the variable, or we can do so via a shadow copy host register. The
5674 * latter will avoid having to reload it if it's being stored later, but
5675 * will waste a host register if it isn't touched again. Since we don't
5676 * know what going to happen, we choose the latter for now.
5677 */
5678 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5679 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5680 kIemNativeGstRegUse_ReadOnly);
5681
5682 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5683 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
5684 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
5685
5686 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
5687 return off;
5688}
5689
5690
5691
5692/*********************************************************************************************************************************
5693* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
5694*********************************************************************************************************************************/
5695
5696#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
5697 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
5698
5699/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
5700DECL_INLINE_THROW(uint32_t)
5701iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
5702{
5703 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5704 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
5705 kIemNativeGstRegUse_ForUpdate);
5706#ifdef RT_ARCH_AMD64
5707 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5708
5709 /* To the lowest byte of the register: mov r8, imm8 */
5710 if (iGRegEx < 16)
5711 {
5712 if (idxGstTmpReg >= 8)
5713 pbCodeBuf[off++] = X86_OP_REX_B;
5714 else if (idxGstTmpReg >= 4)
5715 pbCodeBuf[off++] = X86_OP_REX;
5716 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5717 pbCodeBuf[off++] = u8Value;
5718 }
5719 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5720 else if (idxGstTmpReg < 4)
5721 {
5722 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5723 pbCodeBuf[off++] = u8Value;
5724 }
5725 else
5726 {
5727 /* ror reg64, 8 */
5728 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5729 pbCodeBuf[off++] = 0xc1;
5730 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5731 pbCodeBuf[off++] = 8;
5732
5733 /* mov reg8, imm8 */
5734 if (idxGstTmpReg >= 8)
5735 pbCodeBuf[off++] = X86_OP_REX_B;
5736 else if (idxGstTmpReg >= 4)
5737 pbCodeBuf[off++] = X86_OP_REX;
5738 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5739 pbCodeBuf[off++] = u8Value;
5740
5741 /* rol reg64, 8 */
5742 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5743 pbCodeBuf[off++] = 0xc1;
5744 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5745 pbCodeBuf[off++] = 8;
5746 }
5747
5748#elif defined(RT_ARCH_ARM64)
5749 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5750 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5751 if (iGRegEx < 16)
5752 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5753 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5754 else
5755 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5756 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5757 iemNativeRegFreeTmp(pReNative, idxImmReg);
5758
5759#else
5760# error "Port me!"
5761#endif
5762
5763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5764
5765 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
5766
5767 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5768 return off;
5769}
5770
5771
5772/*
5773 * General purpose register manipulation (add, sub).
5774 */
5775
5776#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5777 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5778
5779/** Emits code for IEM_MC_SUB_GREG_U16. */
5780DECL_INLINE_THROW(uint32_t)
5781iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5782{
5783 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5784 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5785 kIemNativeGstRegUse_ForUpdate);
5786
5787#ifdef RT_ARCH_AMD64
5788 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5790 if (idxGstTmpReg >= 8)
5791 pbCodeBuf[off++] = X86_OP_REX_B;
5792 if (uSubtrahend)
5793 {
5794 pbCodeBuf[off++] = 0xff; /* dec */
5795 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5796 }
5797 else
5798 {
5799 pbCodeBuf[off++] = 0x81;
5800 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5801 pbCodeBuf[off++] = uSubtrahend;
5802 pbCodeBuf[off++] = 0;
5803 }
5804
5805#else
5806 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5807 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5808
5809 /* sub tmp, gstgrp, uSubtrahend */
5810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5811
5812 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5813 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5814
5815 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5816#endif
5817
5818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5819
5820 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5821
5822 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5823 return off;
5824}
5825
5826
5827#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5828 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5829
5830#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5831 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5832
5833/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5834DECL_INLINE_THROW(uint32_t)
5835iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5836{
5837 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5838 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5839 kIemNativeGstRegUse_ForUpdate);
5840
5841#ifdef RT_ARCH_AMD64
5842 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5843 if (f64Bit)
5844 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5845 else if (idxGstTmpReg >= 8)
5846 pbCodeBuf[off++] = X86_OP_REX_B;
5847 if (uSubtrahend == 1)
5848 {
5849 /* dec */
5850 pbCodeBuf[off++] = 0xff;
5851 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5852 }
5853 else if (uSubtrahend < 128)
5854 {
5855 pbCodeBuf[off++] = 0x83; /* sub */
5856 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5857 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5858 }
5859 else
5860 {
5861 pbCodeBuf[off++] = 0x81; /* sub */
5862 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5863 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5864 pbCodeBuf[off++] = 0;
5865 pbCodeBuf[off++] = 0;
5866 pbCodeBuf[off++] = 0;
5867 }
5868
5869#else
5870 /* sub tmp, gstgrp, uSubtrahend */
5871 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5872 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5873
5874#endif
5875
5876 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5877
5878 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5879
5880 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5881 return off;
5882}
5883
5884
5885
5886/*********************************************************************************************************************************
5887* Builtin functions *
5888*********************************************************************************************************************************/
5889
5890/**
5891 * Built-in function that calls a C-implemention function taking zero arguments.
5892 */
5893static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
5894{
5895 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
5896 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
5897 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
5898}
5899
5900
5901/**
5902 * Built-in function that checks for pending interrupts that can be delivered or
5903 * forced action flags.
5904 *
5905 * This triggers after the completion of an instruction, so EIP is already at
5906 * the next instruction. If an IRQ or important FF is pending, this will return
5907 * a non-zero status that stops TB execution.
5908 */
5909static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
5910{
5911 RT_NOREF(pCallEntry);
5912
5913 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
5914 and I'm too lazy to create a 'Fixed' version of that one. */
5915 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
5916 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
5917
5918 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
5919
5920 /* Again, we need to load the extended EFLAGS before we actually need them
5921 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
5922 loaded them inside the check, as the shadow state would not be correct
5923 when the code branches before the load. Ditto PC. */
5924 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5925 kIemNativeGstRegUse_ReadOnly);
5926
5927 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
5928
5929 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5930
5931 /*
5932 * Start by checking the local forced actions of the EMT we're on for IRQs
5933 * and other FFs that needs servicing.
5934 */
5935 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
5936 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
5937 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
5938 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5939 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
5940 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
5941 | VMCPU_FF_TLB_FLUSH
5942 | VMCPU_FF_UNHALT ),
5943 true /*fSetFlags*/);
5944 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
5945 uint32_t const offFixupJumpToVmCheck1 = off;
5946 off = iemNativeEmitJzToFixed(pReNative, off, 0);
5947
5948 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
5949 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
5950 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5951 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
5952 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
5953 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5954
5955 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
5956 suppressed by the CPU or not. */
5957 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
5958 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
5959 idxLabelReturnBreak);
5960
5961 /* We've got shadow flags set, so we must check that the PC they are valid
5962 for matches our current PC value. */
5963 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
5964 * a register. */
5965 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
5966 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
5967
5968 /*
5969 * Now check the force flags of the VM.
5970 */
5971 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
5972 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
5973 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
5974 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
5975 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
5976 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5977
5978 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
5979
5980 /*
5981 * We're good, no IRQs or FFs pending.
5982 */
5983 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5984 iemNativeRegFreeTmp(pReNative, idxEflReg);
5985 iemNativeRegFreeTmp(pReNative, idxPcReg);
5986
5987 return off;
5988}
5989
5990
5991/**
5992 * Built-in function checks if IEMCPU::fExec has the expected value.
5993 */
5994static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
5995{
5996 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
5997 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5998
5999 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6000 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
6001 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
6002 kIemNativeLabelType_ReturnBreak);
6003 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6004 return off;
6005}
6006
6007
6008
6009/*********************************************************************************************************************************
6010* The native code generator functions for each MC block. *
6011*********************************************************************************************************************************/
6012
6013
6014/*
6015 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6016 *
6017 * This should probably live in it's own file later, but lets see what the
6018 * compile times turn out to be first.
6019 */
6020#include "IEMNativeFunctions.cpp.h"
6021
6022
6023
6024/*********************************************************************************************************************************
6025* Recompiler Core. *
6026*********************************************************************************************************************************/
6027
6028
6029/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6030static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6031{
6032 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6033 pDis->cbCachedInstr += cbMaxRead;
6034 RT_NOREF(cbMinRead);
6035 return VERR_NO_DATA;
6036}
6037
6038
6039/**
6040 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6041 * @returns pszBuf.
6042 * @param fFlags The flags.
6043 * @param pszBuf The output buffer.
6044 * @param cbBuf The output buffer size. At least 32 bytes.
6045 */
6046DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6047{
6048 Assert(cbBuf >= 32);
6049 static RTSTRTUPLE const s_aModes[] =
6050 {
6051 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6052 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6053 /* [02] = */ { RT_STR_TUPLE("!2!") },
6054 /* [03] = */ { RT_STR_TUPLE("!3!") },
6055 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6056 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6057 /* [06] = */ { RT_STR_TUPLE("!6!") },
6058 /* [07] = */ { RT_STR_TUPLE("!7!") },
6059 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6060 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6061 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6062 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6063 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6064 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6065 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6066 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6067 /* [10] = */ { RT_STR_TUPLE("!10!") },
6068 /* [11] = */ { RT_STR_TUPLE("!11!") },
6069 /* [12] = */ { RT_STR_TUPLE("!12!") },
6070 /* [13] = */ { RT_STR_TUPLE("!13!") },
6071 /* [14] = */ { RT_STR_TUPLE("!14!") },
6072 /* [15] = */ { RT_STR_TUPLE("!15!") },
6073 /* [16] = */ { RT_STR_TUPLE("!16!") },
6074 /* [17] = */ { RT_STR_TUPLE("!17!") },
6075 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6076 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6077 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6078 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6079 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6080 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6081 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6082 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6083 };
6084 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6085 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6086 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6087
6088 pszBuf[off++] = ' ';
6089 pszBuf[off++] = 'C';
6090 pszBuf[off++] = 'P';
6091 pszBuf[off++] = 'L';
6092 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6093 Assert(off < 32);
6094
6095 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6096
6097 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6098 {
6099 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6100 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6101 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6102 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6103 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6104 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6105 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6106 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6107 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6108 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6109 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6110 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6111 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6112 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6113 };
6114 if (fFlags)
6115 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6116 if (s_aFlags[i].fFlag & fFlags)
6117 {
6118 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6119 pszBuf[off++] = ' ';
6120 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6121 off += s_aFlags[i].cchName;
6122 fFlags &= ~s_aFlags[i].fFlag;
6123 if (!fFlags)
6124 break;
6125 }
6126 pszBuf[off] = '\0';
6127
6128 return pszBuf;
6129}
6130
6131
6132DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
6133{
6134 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
6135
6136 char szDisBuf[512];
6137 DISSTATE Dis;
6138 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
6139 uint32_t const cNative = pTb->Native.cInstructions;
6140 uint32_t offNative = 0;
6141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6142 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
6143#endif
6144 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6145 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6146 : DISCPUMODE_64BIT;
6147#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6148 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
6149#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6150 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
6151#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6152# error "Port me"
6153#else
6154 csh hDisasm = ~(size_t)0;
6155# if defined(RT_ARCH_AMD64)
6156 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
6157# elif defined(RT_ARCH_ARM64)
6158 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
6159# else
6160# error "Port me"
6161# endif
6162 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
6163#endif
6164
6165 /*
6166 * Print TB info.
6167 */
6168 pHlp->pfnPrintf(pHlp,
6169 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
6170 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
6171 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
6172 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
6173#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6174 if (pDbgInfo && pDbgInfo->cEntries > 1)
6175 {
6176 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
6177
6178 /*
6179 * This disassembly is driven by the debug info which follows the native
6180 * code and indicates when it starts with the next guest instructions,
6181 * where labels are and such things.
6182 */
6183 uint32_t idxThreadedCall = 0;
6184 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
6185 uint8_t idxRange = UINT8_MAX;
6186 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
6187 uint32_t offRange = 0;
6188 uint32_t offOpcodes = 0;
6189 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
6190 uint32_t const cDbgEntries = pDbgInfo->cEntries;
6191 uint32_t iDbgEntry = 1;
6192 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
6193
6194 while (offNative < cNative)
6195 {
6196 /* If we're at or have passed the point where the next chunk of debug
6197 info starts, process it. */
6198 if (offDbgNativeNext <= offNative)
6199 {
6200 offDbgNativeNext = UINT32_MAX;
6201 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
6202 {
6203 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
6204 {
6205 case kIemTbDbgEntryType_GuestInstruction:
6206 {
6207 /* Did the exec flag change? */
6208 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
6209 {
6210 pHlp->pfnPrintf(pHlp,
6211 " fExec change %#08x -> %#08x %s\n",
6212 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6213 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6214 szDisBuf, sizeof(szDisBuf)));
6215 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
6216 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6217 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6218 : DISCPUMODE_64BIT;
6219 }
6220
6221 /* New opcode range? We need to fend up a spurious debug info entry here for cases
6222 where the compilation was aborted before the opcode was recorded and the actual
6223 instruction was translated to a threaded call. This may happen when we run out
6224 of ranges, or when some complicated interrupts/FFs are found to be pending or
6225 similar. So, we just deal with it here rather than in the compiler code as it
6226 is a lot simpler to do up here. */
6227 if ( idxRange == UINT8_MAX
6228 || idxRange >= cRanges
6229 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
6230 {
6231 idxRange += 1;
6232 if (idxRange < cRanges)
6233 offRange = 0;
6234 else
6235 continue;
6236 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
6237 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
6238 + (pTb->aRanges[idxRange].idxPhysPage == 0
6239 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6240 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
6241 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6242 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
6243 pTb->aRanges[idxRange].idxPhysPage);
6244 }
6245
6246 /* Disassemble the instruction. */
6247 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
6248 uint32_t cbInstr = 1;
6249 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6250 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
6251 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6252 if (RT_SUCCESS(rc))
6253 {
6254 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6255 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6256 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6257 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6258
6259 static unsigned const s_offMarker = 55;
6260 static char const s_szMarker[] = " ; <--- guest";
6261 if (cch < s_offMarker)
6262 {
6263 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
6264 cch = s_offMarker;
6265 }
6266 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
6267 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
6268
6269 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
6270 }
6271 else
6272 {
6273 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
6274 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
6275 cbInstr = 1;
6276 }
6277 GCPhysPc += cbInstr;
6278 offOpcodes += cbInstr;
6279 offRange += cbInstr;
6280 continue;
6281 }
6282
6283 case kIemTbDbgEntryType_ThreadedCall:
6284 pHlp->pfnPrintf(pHlp,
6285 " Call #%u to %s (%u args)%s\n",
6286 idxThreadedCall,
6287 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6288 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6289 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
6290 idxThreadedCall++;
6291 continue;
6292
6293 case kIemTbDbgEntryType_GuestRegShadowing:
6294 {
6295 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
6296 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
6297 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
6298 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
6299 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6300 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
6301 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
6302 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
6303 else
6304 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
6305 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
6306 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6307 continue;
6308 }
6309
6310 case kIemTbDbgEntryType_Label:
6311 {
6312 const char *pszName = "what_the_fudge";
6313 const char *pszComment = "";
6314 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
6315 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
6316 {
6317 case kIemNativeLabelType_Return:
6318 pszName = "Return";
6319 break;
6320 case kIemNativeLabelType_ReturnBreak:
6321 pszName = "ReturnBreak";
6322 break;
6323 case kIemNativeLabelType_ReturnWithFlags:
6324 pszName = "ReturnWithFlags";
6325 break;
6326 case kIemNativeLabelType_NonZeroRetOrPassUp:
6327 pszName = "NonZeroRetOrPassUp";
6328 break;
6329 case kIemNativeLabelType_RaiseGp0:
6330 pszName = "RaiseGp0";
6331 break;
6332 case kIemNativeLabelType_If:
6333 pszName = "If";
6334 fNumbered = true;
6335 break;
6336 case kIemNativeLabelType_Else:
6337 pszName = "Else";
6338 fNumbered = true;
6339 pszComment = " ; regs state restored pre-if-block";
6340 break;
6341 case kIemNativeLabelType_Endif:
6342 pszName = "Endif";
6343 fNumbered = true;
6344 break;
6345 case kIemNativeLabelType_CheckIrq:
6346 pszName = "CheckIrq_CheckVM";
6347 fNumbered = true;
6348 break;
6349 case kIemNativeLabelType_Invalid:
6350 case kIemNativeLabelType_End:
6351 break;
6352 }
6353 if (fNumbered)
6354 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
6355 else
6356 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
6357 continue;
6358 }
6359
6360 case kIemTbDbgEntryType_NativeOffset:
6361 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
6362 Assert(offDbgNativeNext > offNative);
6363 break;
6364
6365 default:
6366 AssertFailed();
6367 }
6368 iDbgEntry++;
6369 break;
6370 }
6371 }
6372
6373 /*
6374 * Disassemble the next native instruction.
6375 */
6376 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6377# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6378 uint32_t cbInstr = sizeof(paNative[0]);
6379 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6380 if (RT_SUCCESS(rc))
6381 {
6382# if defined(RT_ARCH_AMD64)
6383 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6384 {
6385 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6386 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6387 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
6388 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6389 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6390 uInfo & 0x8000 ? " - recompiled" : "");
6391 else
6392 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6393 }
6394 else
6395# endif
6396 {
6397# ifdef RT_ARCH_AMD64
6398 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6399 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6400 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6401 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6402# elif defined(RT_ARCH_ARM64)
6403 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6404 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6405 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6406# else
6407# error "Port me"
6408# endif
6409 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6410 }
6411 }
6412 else
6413 {
6414# if defined(RT_ARCH_AMD64)
6415 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6416 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6417# elif defined(RT_ARCH_ARM64)
6418 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6419# else
6420# error "Port me"
6421# endif
6422 cbInstr = sizeof(paNative[0]);
6423 }
6424 offNative += cbInstr / sizeof(paNative[0]);
6425
6426# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6427 cs_insn *pInstr;
6428 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6429 (uintptr_t)pNativeCur, 1, &pInstr);
6430 if (cInstrs > 0)
6431 {
6432 Assert(cInstrs == 1);
6433# if defined(RT_ARCH_AMD64)
6434 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6435 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6436# else
6437 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6438 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6439# endif
6440 offNative += pInstr->size / sizeof(*pNativeCur);
6441 cs_free(pInstr, cInstrs);
6442 }
6443 else
6444 {
6445# if defined(RT_ARCH_AMD64)
6446 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6447 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6448# else
6449 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6450# endif
6451 offNative++;
6452 }
6453# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6454 }
6455 }
6456 else
6457#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
6458 {
6459 /*
6460 * No debug info, just disassemble the x86 code and then the native code.
6461 *
6462 * First the guest code:
6463 */
6464 for (unsigned i = 0; i < pTb->cRanges; i++)
6465 {
6466 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
6467 + (pTb->aRanges[i].idxPhysPage == 0
6468 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6469 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
6470 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6471 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
6472 unsigned off = pTb->aRanges[i].offOpcodes;
6473 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
6474 while (off < cbOpcodes)
6475 {
6476 uint32_t cbInstr = 1;
6477 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6478 &pTb->pabOpcodes[off], cbOpcodes - off,
6479 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6480 if (RT_SUCCESS(rc))
6481 {
6482 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6483 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6484 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6485 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6486 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
6487 GCPhysPc += cbInstr;
6488 off += cbInstr;
6489 }
6490 else
6491 {
6492 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
6493 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
6494 break;
6495 }
6496 }
6497 }
6498
6499 /*
6500 * Then the native code:
6501 */
6502 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
6503 while (offNative < cNative)
6504 {
6505 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6506# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6507 uint32_t cbInstr = sizeof(paNative[0]);
6508 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6509 if (RT_SUCCESS(rc))
6510 {
6511# if defined(RT_ARCH_AMD64)
6512 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6513 {
6514 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6515 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6516 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
6517 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6518 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6519 uInfo & 0x8000 ? " - recompiled" : "");
6520 else
6521 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6522 }
6523 else
6524# endif
6525 {
6526# ifdef RT_ARCH_AMD64
6527 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6528 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6529 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6530 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6531# elif defined(RT_ARCH_ARM64)
6532 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6533 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6534 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6535# else
6536# error "Port me"
6537# endif
6538 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6539 }
6540 }
6541 else
6542 {
6543# if defined(RT_ARCH_AMD64)
6544 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6545 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6546# else
6547 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6548# endif
6549 cbInstr = sizeof(paNative[0]);
6550 }
6551 offNative += cbInstr / sizeof(paNative[0]);
6552
6553# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6554 cs_insn *pInstr;
6555 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6556 (uintptr_t)pNativeCur, 1, &pInstr);
6557 if (cInstrs > 0)
6558 {
6559 Assert(cInstrs == 1);
6560# if defined(RT_ARCH_AMD64)
6561 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6562 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6563# else
6564 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6565 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6566# endif
6567 offNative += pInstr->size / sizeof(*pNativeCur);
6568 cs_free(pInstr, cInstrs);
6569 }
6570 else
6571 {
6572# if defined(RT_ARCH_AMD64)
6573 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6574 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6575# else
6576 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6577# endif
6578 offNative++;
6579 }
6580# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6581 }
6582 }
6583
6584#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6585 /* Cleanup. */
6586 cs_close(&hDisasm);
6587#endif
6588}
6589
6590
6591/**
6592 * Recompiles the given threaded TB into a native one.
6593 *
6594 * In case of failure the translation block will be returned as-is.
6595 *
6596 * @returns pTb.
6597 * @param pVCpu The cross context virtual CPU structure of the calling
6598 * thread.
6599 * @param pTb The threaded translation to recompile to native.
6600 */
6601DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
6602{
6603 /*
6604 * The first time thru, we allocate the recompiler state, the other times
6605 * we just need to reset it before using it again.
6606 */
6607 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
6608 if (RT_LIKELY(pReNative))
6609 iemNativeReInit(pReNative, pTb);
6610 else
6611 {
6612 pReNative = iemNativeInit(pVCpu, pTb);
6613 AssertReturn(pReNative, pTb);
6614 }
6615
6616 /*
6617 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
6618 * for aborting if an error happens.
6619 */
6620 uint32_t cCallsLeft = pTb->Thrd.cCalls;
6621#ifdef LOG_ENABLED
6622 uint32_t const cCallsOrg = cCallsLeft;
6623#endif
6624 uint32_t off = 0;
6625 int rc = VINF_SUCCESS;
6626 IEMNATIVE_TRY_SETJMP(pReNative, rc)
6627 {
6628 /*
6629 * Emit prolog code (fixed).
6630 */
6631 off = iemNativeEmitProlog(pReNative, off);
6632
6633 /*
6634 * Convert the calls to native code.
6635 */
6636#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6637 int32_t iGstInstr = -1;
6638 uint32_t fExec = pTb->fFlags;
6639#endif
6640 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
6641 while (cCallsLeft-- > 0)
6642 {
6643 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
6644
6645 /*
6646 * Debug info and assembly markup.
6647 */
6648#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6649 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
6650 fExec = pCallEntry->auParams[0];
6651 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6652 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
6653 {
6654 if (iGstInstr < (int32_t)pTb->cInstructions)
6655 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
6656 else
6657 Assert(iGstInstr == pTb->cInstructions);
6658 iGstInstr = pCallEntry->idxInstr;
6659 }
6660 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
6661#endif
6662#if defined(VBOX_STRICT) && 1
6663 off = iemNativeEmitMarker(pReNative, off,
6664 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
6665 pCallEntry->enmFunction));
6666#endif
6667
6668 /*
6669 * Actual work.
6670 */
6671 if (pfnRecom) /** @todo stats on this. */
6672 {
6673 //STAM_COUNTER_INC()
6674 off = pfnRecom(pReNative, off, pCallEntry);
6675 }
6676 else
6677 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
6678 Assert(off <= pReNative->cInstrBufAlloc);
6679 Assert(pReNative->cCondDepth == 0);
6680
6681 /*
6682 * Advance.
6683 */
6684 pCallEntry++;
6685 }
6686
6687 /*
6688 * Emit the epilog code.
6689 */
6690 uint32_t idxReturnLabel;
6691 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
6692
6693 /*
6694 * Generate special jump labels.
6695 */
6696 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
6697 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
6698 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
6699 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
6700 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
6701 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
6702 }
6703 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
6704 {
6705 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
6706 return pTb;
6707 }
6708 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
6709 Assert(off <= pReNative->cInstrBufAlloc);
6710
6711 /*
6712 * Make sure all labels has been defined.
6713 */
6714 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
6715#ifdef VBOX_STRICT
6716 uint32_t const cLabels = pReNative->cLabels;
6717 for (uint32_t i = 0; i < cLabels; i++)
6718 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
6719#endif
6720
6721 /*
6722 * Allocate executable memory, copy over the code we've generated.
6723 */
6724 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
6725 if (pTbAllocator->pDelayedFreeHead)
6726 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
6727
6728 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
6729 AssertReturn(paFinalInstrBuf, pTb);
6730 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
6731
6732 /*
6733 * Apply fixups.
6734 */
6735 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
6736 uint32_t const cFixups = pReNative->cFixups;
6737 for (uint32_t i = 0; i < cFixups; i++)
6738 {
6739 Assert(paFixups[i].off < off);
6740 Assert(paFixups[i].idxLabel < cLabels);
6741 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
6742 switch (paFixups[i].enmType)
6743 {
6744#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6745 case kIemNativeFixupType_Rel32:
6746 Assert(paFixups[i].off + 4 <= off);
6747 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6748 continue;
6749
6750#elif defined(RT_ARCH_ARM64)
6751 case kIemNativeFixupType_RelImm26At0:
6752 {
6753 Assert(paFixups[i].off < off);
6754 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6755 Assert(offDisp >= -262144 && offDisp < 262144);
6756 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6757 continue;
6758 }
6759
6760 case kIemNativeFixupType_RelImm19At5:
6761 {
6762 Assert(paFixups[i].off < off);
6763 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6764 Assert(offDisp >= -262144 && offDisp < 262144);
6765 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6766 continue;
6767 }
6768
6769 case kIemNativeFixupType_RelImm14At5:
6770 {
6771 Assert(paFixups[i].off < off);
6772 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6773 Assert(offDisp >= -8192 && offDisp < 8192);
6774 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
6775 continue;
6776 }
6777
6778#endif
6779 case kIemNativeFixupType_Invalid:
6780 case kIemNativeFixupType_End:
6781 break;
6782 }
6783 AssertFailed();
6784 }
6785
6786 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
6787
6788 /*
6789 * Convert the translation block.
6790 */
6791 //RT_BREAKPOINT();
6792 RTMemFree(pTb->Thrd.paCalls);
6793 pTb->Native.paInstructions = paFinalInstrBuf;
6794 pTb->Native.cInstructions = off;
6795 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
6796#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6797 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
6798 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
6799#endif
6800
6801 Assert(pTbAllocator->cThreadedTbs > 0);
6802 pTbAllocator->cThreadedTbs -= 1;
6803 pTbAllocator->cNativeTbs += 1;
6804 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
6805
6806#ifdef LOG_ENABLED
6807 /*
6808 * Disassemble to the log if enabled.
6809 */
6810 if (LogIs3Enabled())
6811 {
6812 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
6813 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
6814 }
6815#endif
6816
6817 return pTb;
6818}
6819
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette