VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102022

Last change on this file since 102022 was 102022, checked in by vboxsync, 13 months ago

VMM/IEM: Split out the inlined code emitters from IEMN8veRecompiler.h and into IEMN8veRecompilerEmit.h. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 300.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102022 2023-11-09 11:38:47Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128
129/*********************************************************************************************************************************
130* Defined Constants And Macros *
131*********************************************************************************************************************************/
132/** Always count instructions for now. */
133#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
134
135
136/*********************************************************************************************************************************
137* Internal Functions *
138*********************************************************************************************************************************/
139#ifdef VBOX_STRICT
140static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
141 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
142#endif
143#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
144static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
145static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
146#endif
147
148
149/*********************************************************************************************************************************
150* Executable Memory Allocator *
151*********************************************************************************************************************************/
152/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
153 * Use an alternative chunk sub-allocator that does store internal data
154 * in the chunk.
155 *
156 * Using the RTHeapSimple is not practial on newer darwin systems where
157 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
158 * memory. We would have to change the protection of the whole chunk for
159 * every call to RTHeapSimple, which would be rather expensive.
160 *
161 * This alternative implemenation let restrict page protection modifications
162 * to the pages backing the executable memory we just allocated.
163 */
164#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
165/** The chunk sub-allocation unit size in bytes. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
167/** The chunk sub-allocation unit size as a shift factor. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
169
170#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
171# ifdef IEMNATIVE_USE_GDB_JIT
172# define IEMNATIVE_USE_GDB_JIT_ET_DYN
173
174/** GDB JIT: Code entry. */
175typedef struct GDBJITCODEENTRY
176{
177 struct GDBJITCODEENTRY *pNext;
178 struct GDBJITCODEENTRY *pPrev;
179 uint8_t *pbSymFile;
180 uint64_t cbSymFile;
181} GDBJITCODEENTRY;
182
183/** GDB JIT: Actions. */
184typedef enum GDBJITACTIONS : uint32_t
185{
186 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
187} GDBJITACTIONS;
188
189/** GDB JIT: Descriptor. */
190typedef struct GDBJITDESCRIPTOR
191{
192 uint32_t uVersion;
193 GDBJITACTIONS enmAction;
194 GDBJITCODEENTRY *pRelevant;
195 GDBJITCODEENTRY *pHead;
196 /** Our addition: */
197 GDBJITCODEENTRY *pTail;
198} GDBJITDESCRIPTOR;
199
200/** GDB JIT: Our simple symbol file data. */
201typedef struct GDBJITSYMFILE
202{
203 Elf64_Ehdr EHdr;
204# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
205 Elf64_Shdr aShdrs[5];
206# else
207 Elf64_Shdr aShdrs[7];
208 Elf64_Phdr aPhdrs[2];
209# endif
210 /** The dwarf ehframe data for the chunk. */
211 uint8_t abEhFrame[512];
212 char szzStrTab[128];
213 Elf64_Sym aSymbols[3];
214# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
215 Elf64_Sym aDynSyms[2];
216 Elf64_Dyn aDyn[6];
217# endif
218} GDBJITSYMFILE;
219
220extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
221extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
222
223/** Init once for g_IemNativeGdbJitLock. */
224static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
225/** Init once for the critical section. */
226static RTCRITSECT g_IemNativeGdbJitLock;
227
228/** GDB reads the info here. */
229GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
230
231/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
232DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
233{
234 ASMNopPause();
235}
236
237/** @callback_method_impl{FNRTONCE} */
238static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
239{
240 RT_NOREF(pvUser);
241 return RTCritSectInit(&g_IemNativeGdbJitLock);
242}
243
244
245# endif /* IEMNATIVE_USE_GDB_JIT */
246
247/**
248 * Per-chunk unwind info for non-windows hosts.
249 */
250typedef struct IEMEXECMEMCHUNKEHFRAME
251{
252# ifdef IEMNATIVE_USE_LIBUNWIND
253 /** The offset of the FDA into abEhFrame. */
254 uintptr_t offFda;
255# else
256 /** 'struct object' storage area. */
257 uint8_t abObject[1024];
258# endif
259# ifdef IEMNATIVE_USE_GDB_JIT
260# if 0
261 /** The GDB JIT 'symbol file' data. */
262 GDBJITSYMFILE GdbJitSymFile;
263# endif
264 /** The GDB JIT list entry. */
265 GDBJITCODEENTRY GdbJitEntry;
266# endif
267 /** The dwarf ehframe data for the chunk. */
268 uint8_t abEhFrame[512];
269} IEMEXECMEMCHUNKEHFRAME;
270/** Pointer to per-chunk info info for non-windows hosts. */
271typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
272#endif
273
274
275/**
276 * An chunk of executable memory.
277 */
278typedef struct IEMEXECMEMCHUNK
279{
280#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
281 /** Number of free items in this chunk. */
282 uint32_t cFreeUnits;
283 /** Hint were to start searching for free space in the allocation bitmap. */
284 uint32_t idxFreeHint;
285#else
286 /** The heap handle. */
287 RTHEAPSIMPLE hHeap;
288#endif
289 /** Pointer to the chunk. */
290 void *pvChunk;
291#ifdef IN_RING3
292 /**
293 * Pointer to the unwind information.
294 *
295 * This is used during C++ throw and longjmp (windows and probably most other
296 * platforms). Some debuggers (windbg) makes use of it as well.
297 *
298 * Windows: This is allocated from hHeap on windows because (at least for
299 * AMD64) the UNWIND_INFO structure address in the
300 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
301 *
302 * Others: Allocated from the regular heap to avoid unnecessary executable data
303 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
304 void *pvUnwindInfo;
305#elif defined(IN_RING0)
306 /** Allocation handle. */
307 RTR0MEMOBJ hMemObj;
308#endif
309} IEMEXECMEMCHUNK;
310/** Pointer to a memory chunk. */
311typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
312
313
314/**
315 * Executable memory allocator for the native recompiler.
316 */
317typedef struct IEMEXECMEMALLOCATOR
318{
319 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
320 uint32_t uMagic;
321
322 /** The chunk size. */
323 uint32_t cbChunk;
324 /** The maximum number of chunks. */
325 uint32_t cMaxChunks;
326 /** The current number of chunks. */
327 uint32_t cChunks;
328 /** Hint where to start looking for available memory. */
329 uint32_t idxChunkHint;
330 /** Statistics: Current number of allocations. */
331 uint32_t cAllocations;
332
333 /** The total amount of memory available. */
334 uint64_t cbTotal;
335 /** Total amount of free memory. */
336 uint64_t cbFree;
337 /** Total amount of memory allocated. */
338 uint64_t cbAllocated;
339
340#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
341 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
342 *
343 * Since the chunk size is a power of two and the minimum chunk size is a lot
344 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
345 * require a whole number of uint64_t elements in the allocation bitmap. So,
346 * for sake of simplicity, they are allocated as one continous chunk for
347 * simplicity/laziness. */
348 uint64_t *pbmAlloc;
349 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
350 uint32_t cUnitsPerChunk;
351 /** Number of bitmap elements per chunk (for quickly locating the bitmap
352 * portion corresponding to an chunk). */
353 uint32_t cBitmapElementsPerChunk;
354#else
355 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
356 * @{ */
357 /** The size of the heap internal block header. This is used to adjust the
358 * request memory size to make sure there is exacly enough room for a header at
359 * the end of the blocks we allocate before the next 64 byte alignment line. */
360 uint32_t cbHeapBlockHdr;
361 /** The size of initial heap allocation required make sure the first
362 * allocation is correctly aligned. */
363 uint32_t cbHeapAlignTweak;
364 /** The alignment tweak allocation address. */
365 void *pvAlignTweak;
366 /** @} */
367#endif
368
369#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
370 /** Pointer to the array of unwind info running parallel to aChunks (same
371 * allocation as this structure, located after the bitmaps).
372 * (For Windows, the structures must reside in 32-bit RVA distance to the
373 * actual chunk, so they are allocated off the chunk.) */
374 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
375#endif
376
377 /** The allocation chunks. */
378 RT_FLEXIBLE_ARRAY_EXTENSION
379 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
380} IEMEXECMEMALLOCATOR;
381/** Pointer to an executable memory allocator. */
382typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
383
384/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
385#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
386
387
388static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
389
390
391/**
392 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
393 * the heap statistics.
394 */
395static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
396 uint32_t cbReq, uint32_t idxChunk)
397{
398 pExecMemAllocator->cAllocations += 1;
399 pExecMemAllocator->cbAllocated += cbReq;
400#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
401 pExecMemAllocator->cbFree -= cbReq;
402#else
403 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
404#endif
405 pExecMemAllocator->idxChunkHint = idxChunk;
406
407#ifdef RT_OS_DARWIN
408 /*
409 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
410 * on darwin. So, we mark the pages returned as read+write after alloc and
411 * expect the caller to call iemExecMemAllocatorReadyForUse when done
412 * writing to the allocation.
413 *
414 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
415 * for details.
416 */
417 /** @todo detect if this is necessary... it wasn't required on 10.15 or
418 * whatever older version it was. */
419 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
420 AssertRC(rc);
421#endif
422
423 return pvRet;
424}
425
426
427#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
428static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
429 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
430{
431 /*
432 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
433 */
434 Assert(!(cToScan & 63));
435 Assert(!(idxFirst & 63));
436 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
437 pbmAlloc += idxFirst / 64;
438
439 /*
440 * Scan the bitmap for cReqUnits of consequtive clear bits
441 */
442 /** @todo This can probably be done more efficiently for non-x86 systems. */
443 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
444 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
445 {
446 uint32_t idxAddBit = 1;
447 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
448 idxAddBit++;
449 if (idxAddBit >= cReqUnits)
450 {
451 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
452
453 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
454 pChunk->cFreeUnits -= cReqUnits;
455 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
456
457 void * const pvRet = (uint8_t *)pChunk->pvChunk
458 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
459
460 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
461 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
462 }
463
464 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
465 }
466 return NULL;
467}
468#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
469
470
471static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
472{
473#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
474 /*
475 * Figure out how much to allocate.
476 */
477 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
478 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
479 {
480 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
481 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
482 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
483 {
484 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
485 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
486 if (pvRet)
487 return pvRet;
488 }
489 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
490 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
491 cReqUnits, idxChunk);
492 }
493#else
494 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
495 if (pvRet)
496 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
497#endif
498 return NULL;
499
500}
501
502
503/**
504 * Allocates @a cbReq bytes of executable memory.
505 *
506 * @returns Pointer to the memory, NULL if out of memory or other problem
507 * encountered.
508 * @param pVCpu The cross context virtual CPU structure of the calling
509 * thread.
510 * @param cbReq How many bytes are required.
511 */
512static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
513{
514 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
515 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
516 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
517
518 /*
519 * Adjust the request size so it'll fit the allocator alignment/whatnot.
520 *
521 * For the RTHeapSimple allocator this means to follow the logic described
522 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
523 * existing chunks if we think we've got sufficient free memory around.
524 *
525 * While for the alternative one we just align it up to a whole unit size.
526 */
527#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
528 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
529#else
530 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
531#endif
532 if (cbReq <= pExecMemAllocator->cbFree)
533 {
534 uint32_t const cChunks = pExecMemAllocator->cChunks;
535 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
536 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
537 {
538 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
539 if (pvRet)
540 return pvRet;
541 }
542 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
543 {
544 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
545 if (pvRet)
546 return pvRet;
547 }
548 }
549
550 /*
551 * Can we grow it with another chunk?
552 */
553 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
554 {
555 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
556 AssertLogRelRCReturn(rc, NULL);
557
558 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
559 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
560 if (pvRet)
561 return pvRet;
562 AssertFailed();
563 }
564
565 /* What now? Prune native translation blocks from the cache? */
566 AssertFailed();
567 return NULL;
568}
569
570
571/** This is a hook that we may need later for changing memory protection back
572 * to readonly+exec */
573static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
574{
575#ifdef RT_OS_DARWIN
576 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
577 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
578 AssertRC(rc); RT_NOREF(pVCpu);
579
580 /*
581 * Flush the instruction cache:
582 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
583 */
584 /* sys_dcache_flush(pv, cb); - not necessary */
585 sys_icache_invalidate(pv, cb);
586#else
587 RT_NOREF(pVCpu, pv, cb);
588#endif
589}
590
591
592/**
593 * Frees executable memory.
594 */
595void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
596{
597 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
598 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
599 Assert(pv);
600#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
601 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
602#else
603 Assert(!((uintptr_t)pv & 63));
604#endif
605
606 /* Align the size as we did when allocating the block. */
607#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
608 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
609#else
610 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
611#endif
612
613 /* Free it / assert sanity. */
614#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
615 uint32_t const cChunks = pExecMemAllocator->cChunks;
616 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
617 bool fFound = false;
618 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
619 {
620 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
621 fFound = offChunk < cbChunk;
622 if (fFound)
623 {
624#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
625 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
626 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
627
628 /* Check that it's valid and free it. */
629 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
630 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
631 for (uint32_t i = 1; i < cReqUnits; i++)
632 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
633 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
634
635 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
636 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
637
638 /* Update the stats. */
639 pExecMemAllocator->cbAllocated -= cb;
640 pExecMemAllocator->cbFree += cb;
641 pExecMemAllocator->cAllocations -= 1;
642 return;
643#else
644 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
645 break;
646#endif
647 }
648 }
649# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
650 AssertFailed();
651# else
652 Assert(fFound);
653# endif
654#endif
655
656#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
657 /* Update stats while cb is freshly calculated.*/
658 pExecMemAllocator->cbAllocated -= cb;
659 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
660 pExecMemAllocator->cAllocations -= 1;
661
662 /* Free it. */
663 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
664#endif
665}
666
667
668
669#ifdef IN_RING3
670# ifdef RT_OS_WINDOWS
671
672/**
673 * Initializes the unwind info structures for windows hosts.
674 */
675static int
676iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
677 void *pvChunk, uint32_t idxChunk)
678{
679 RT_NOREF(pVCpu);
680
681 /*
682 * The AMD64 unwind opcodes.
683 *
684 * This is a program that starts with RSP after a RET instruction that
685 * ends up in recompiled code, and the operations we describe here will
686 * restore all non-volatile registers and bring RSP back to where our
687 * RET address is. This means it's reverse order from what happens in
688 * the prologue.
689 *
690 * Note! Using a frame register approach here both because we have one
691 * and but mainly because the UWOP_ALLOC_LARGE argument values
692 * would be a pain to write initializers for. On the positive
693 * side, we're impervious to changes in the the stack variable
694 * area can can deal with dynamic stack allocations if necessary.
695 */
696 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
697 {
698 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
699 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
700 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
701 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
702 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
703 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
704 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
705 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
706 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
707 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
708 };
709 union
710 {
711 IMAGE_UNWIND_INFO Info;
712 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
713 } s_UnwindInfo =
714 {
715 {
716 /* .Version = */ 1,
717 /* .Flags = */ 0,
718 /* .SizeOfProlog = */ 16, /* whatever */
719 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
720 /* .FrameRegister = */ X86_GREG_xBP,
721 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
722 }
723 };
724 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
725 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
726
727 /*
728 * Calc how much space we need and allocate it off the exec heap.
729 */
730 unsigned const cFunctionEntries = 1;
731 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
732 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
733# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
734 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
735 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
736 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
737# else
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
739 - pExecMemAllocator->cbHeapBlockHdr;
740 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
741 32 /*cbAlignment*/);
742# endif
743 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
744 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
745
746 /*
747 * Initialize the structures.
748 */
749 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
750
751 paFunctions[0].BeginAddress = 0;
752 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
753 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
754
755 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
756 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
757
758 /*
759 * Register it.
760 */
761 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
762 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
763
764 return VINF_SUCCESS;
765}
766
767
768# else /* !RT_OS_WINDOWS */
769
770/**
771 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
772 */
773DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
774{
775 if (iValue >= 64)
776 {
777 Assert(iValue < 0x2000);
778 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
779 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
780 }
781 else if (iValue >= 0)
782 *Ptr.pb++ = (uint8_t)iValue;
783 else if (iValue > -64)
784 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
785 else
786 {
787 Assert(iValue > -0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
790 }
791 return Ptr;
792}
793
794
795/**
796 * Emits an ULEB128 encoded value (up to 64-bit wide).
797 */
798DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
799{
800 while (uValue >= 0x80)
801 {
802 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
803 uValue >>= 7;
804 }
805 *Ptr.pb++ = (uint8_t)uValue;
806 return Ptr;
807}
808
809
810/**
811 * Emits a CFA rule as register @a uReg + offset @a off.
812 */
813DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
814{
815 *Ptr.pb++ = DW_CFA_def_cfa;
816 Ptr = iemDwarfPutUleb128(Ptr, uReg);
817 Ptr = iemDwarfPutUleb128(Ptr, off);
818 return Ptr;
819}
820
821
822/**
823 * Emits a register (@a uReg) save location:
824 * CFA + @a off * data_alignment_factor
825 */
826DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
827{
828 if (uReg < 0x40)
829 *Ptr.pb++ = DW_CFA_offset | uReg;
830 else
831 {
832 *Ptr.pb++ = DW_CFA_offset_extended;
833 Ptr = iemDwarfPutUleb128(Ptr, uReg);
834 }
835 Ptr = iemDwarfPutUleb128(Ptr, off);
836 return Ptr;
837}
838
839
840# if 0 /* unused */
841/**
842 * Emits a register (@a uReg) save location, using signed offset:
843 * CFA + @a offSigned * data_alignment_factor
844 */
845DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
846{
847 *Ptr.pb++ = DW_CFA_offset_extended_sf;
848 Ptr = iemDwarfPutUleb128(Ptr, uReg);
849 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
850 return Ptr;
851}
852# endif
853
854
855/**
856 * Initializes the unwind info section for non-windows hosts.
857 */
858static int
859iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
860 void *pvChunk, uint32_t idxChunk)
861{
862 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
863 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
864
865 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
866
867 /*
868 * Generate the CIE first.
869 */
870# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
871 uint8_t const iDwarfVer = 3;
872# else
873 uint8_t const iDwarfVer = 4;
874# endif
875 RTPTRUNION const PtrCie = Ptr;
876 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
877 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
878 *Ptr.pb++ = iDwarfVer; /* DwARF version */
879 *Ptr.pb++ = 0; /* Augmentation. */
880 if (iDwarfVer >= 4)
881 {
882 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
883 *Ptr.pb++ = 0; /* Segment selector size. */
884 }
885# ifdef RT_ARCH_AMD64
886 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
887# else
888 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
889# endif
890 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
891# ifdef RT_ARCH_AMD64
892 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
893# elif defined(RT_ARCH_ARM64)
894 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
895# else
896# error "port me"
897# endif
898 /* Initial instructions: */
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
908# elif defined(RT_ARCH_ARM64)
909# if 1
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
911# else
912 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
913# endif
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
926 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
927 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
928# else
929# error "port me"
930# endif
931 while ((Ptr.u - PtrCie.u) & 3)
932 *Ptr.pb++ = DW_CFA_nop;
933 /* Finalize the CIE size. */
934 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
935
936 /*
937 * Generate an FDE for the whole chunk area.
938 */
939# ifdef IEMNATIVE_USE_LIBUNWIND
940 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
941# endif
942 RTPTRUNION const PtrFde = Ptr;
943 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
944 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
945 Ptr.pu32++;
946 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
947 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
948# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
949 *Ptr.pb++ = DW_CFA_nop;
950# endif
951 while ((Ptr.u - PtrFde.u) & 3)
952 *Ptr.pb++ = DW_CFA_nop;
953 /* Finalize the FDE size. */
954 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
955
956 /* Terminator entry. */
957 *Ptr.pu32++ = 0;
958 *Ptr.pu32++ = 0; /* just to be sure... */
959 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
960
961 /*
962 * Register it.
963 */
964# ifdef IEMNATIVE_USE_LIBUNWIND
965 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
966# else
967 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
968 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
969# endif
970
971# ifdef IEMNATIVE_USE_GDB_JIT
972 /*
973 * Now for telling GDB about this (experimental).
974 *
975 * This seems to work best with ET_DYN.
976 */
977 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
978# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
979 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
980 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
981# else
982 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
983 - pExecMemAllocator->cbHeapBlockHdr;
984 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
985# endif
986 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
987 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
988
989 RT_ZERO(*pSymFile);
990
991 /*
992 * The ELF header:
993 */
994 pSymFile->EHdr.e_ident[0] = ELFMAG0;
995 pSymFile->EHdr.e_ident[1] = ELFMAG1;
996 pSymFile->EHdr.e_ident[2] = ELFMAG2;
997 pSymFile->EHdr.e_ident[3] = ELFMAG3;
998 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
999 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1000 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1001 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1002# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1003 pSymFile->EHdr.e_type = ET_DYN;
1004# else
1005 pSymFile->EHdr.e_type = ET_REL;
1006# endif
1007# ifdef RT_ARCH_AMD64
1008 pSymFile->EHdr.e_machine = EM_AMD64;
1009# elif defined(RT_ARCH_ARM64)
1010 pSymFile->EHdr.e_machine = EM_AARCH64;
1011# else
1012# error "port me"
1013# endif
1014 pSymFile->EHdr.e_version = 1; /*?*/
1015 pSymFile->EHdr.e_entry = 0;
1016# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1017 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1018# else
1019 pSymFile->EHdr.e_phoff = 0;
1020# endif
1021 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1022 pSymFile->EHdr.e_flags = 0;
1023 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1024# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1025 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1026 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1027# else
1028 pSymFile->EHdr.e_phentsize = 0;
1029 pSymFile->EHdr.e_phnum = 0;
1030# endif
1031 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1032 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1033 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1034
1035 uint32_t offStrTab = 0;
1036#define APPEND_STR(a_szStr) do { \
1037 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1038 offStrTab += sizeof(a_szStr); \
1039 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1040 } while (0)
1041#define APPEND_STR_FMT(a_szStr, ...) do { \
1042 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1043 offStrTab++; \
1044 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1045 } while (0)
1046
1047 /*
1048 * Section headers.
1049 */
1050 /* Section header #0: NULL */
1051 unsigned i = 0;
1052 APPEND_STR("");
1053 RT_ZERO(pSymFile->aShdrs[i]);
1054 i++;
1055
1056 /* Section header: .eh_frame */
1057 pSymFile->aShdrs[i].sh_name = offStrTab;
1058 APPEND_STR(".eh_frame");
1059 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1060 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1061# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1062 pSymFile->aShdrs[i].sh_offset
1063 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1064# else
1065 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1066 pSymFile->aShdrs[i].sh_offset = 0;
1067# endif
1068
1069 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1070 pSymFile->aShdrs[i].sh_link = 0;
1071 pSymFile->aShdrs[i].sh_info = 0;
1072 pSymFile->aShdrs[i].sh_addralign = 1;
1073 pSymFile->aShdrs[i].sh_entsize = 0;
1074 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1075 i++;
1076
1077 /* Section header: .shstrtab */
1078 unsigned const iShStrTab = i;
1079 pSymFile->EHdr.e_shstrndx = iShStrTab;
1080 pSymFile->aShdrs[i].sh_name = offStrTab;
1081 APPEND_STR(".shstrtab");
1082 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1083 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1084# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1085 pSymFile->aShdrs[i].sh_offset
1086 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1087# else
1088 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1089 pSymFile->aShdrs[i].sh_offset = 0;
1090# endif
1091 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1092 pSymFile->aShdrs[i].sh_link = 0;
1093 pSymFile->aShdrs[i].sh_info = 0;
1094 pSymFile->aShdrs[i].sh_addralign = 1;
1095 pSymFile->aShdrs[i].sh_entsize = 0;
1096 i++;
1097
1098 /* Section header: .symbols */
1099 pSymFile->aShdrs[i].sh_name = offStrTab;
1100 APPEND_STR(".symtab");
1101 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1102 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1103 pSymFile->aShdrs[i].sh_offset
1104 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1106 pSymFile->aShdrs[i].sh_link = iShStrTab;
1107 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1108 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1109 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1110 i++;
1111
1112# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1113 /* Section header: .symbols */
1114 pSymFile->aShdrs[i].sh_name = offStrTab;
1115 APPEND_STR(".dynsym");
1116 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1117 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1118 pSymFile->aShdrs[i].sh_offset
1119 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1120 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1121 pSymFile->aShdrs[i].sh_link = iShStrTab;
1122 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1123 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1124 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1125 i++;
1126# endif
1127
1128# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1129 /* Section header: .dynamic */
1130 pSymFile->aShdrs[i].sh_name = offStrTab;
1131 APPEND_STR(".dynamic");
1132 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1133 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1134 pSymFile->aShdrs[i].sh_offset
1135 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1136 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1137 pSymFile->aShdrs[i].sh_link = iShStrTab;
1138 pSymFile->aShdrs[i].sh_info = 0;
1139 pSymFile->aShdrs[i].sh_addralign = 1;
1140 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1141 i++;
1142# endif
1143
1144 /* Section header: .text */
1145 unsigned const iShText = i;
1146 pSymFile->aShdrs[i].sh_name = offStrTab;
1147 APPEND_STR(".text");
1148 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1149 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1150# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1151 pSymFile->aShdrs[i].sh_offset
1152 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1153# else
1154 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1155 pSymFile->aShdrs[i].sh_offset = 0;
1156# endif
1157 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1158 pSymFile->aShdrs[i].sh_link = 0;
1159 pSymFile->aShdrs[i].sh_info = 0;
1160 pSymFile->aShdrs[i].sh_addralign = 1;
1161 pSymFile->aShdrs[i].sh_entsize = 0;
1162 i++;
1163
1164 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1165
1166# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1167 /*
1168 * The program headers:
1169 */
1170 /* Everything in a single LOAD segment: */
1171 i = 0;
1172 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1173 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1174 pSymFile->aPhdrs[i].p_offset
1175 = pSymFile->aPhdrs[i].p_vaddr
1176 = pSymFile->aPhdrs[i].p_paddr = 0;
1177 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1178 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1179 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1180 i++;
1181 /* The .dynamic segment. */
1182 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1183 pSymFile->aPhdrs[i].p_flags = PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1189 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1190 i++;
1191
1192 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1193
1194 /*
1195 * The dynamic section:
1196 */
1197 i = 0;
1198 pSymFile->aDyn[i].d_tag = DT_SONAME;
1199 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1200 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1201 i++;
1202 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1203 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1204 i++;
1205 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1206 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1207 i++;
1208 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1209 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1210 i++;
1211 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1212 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1213 i++;
1214 pSymFile->aDyn[i].d_tag = DT_NULL;
1215 i++;
1216 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1217# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1218
1219 /*
1220 * Symbol tables:
1221 */
1222 /** @todo gdb doesn't seem to really like this ... */
1223 i = 0;
1224 pSymFile->aSymbols[i].st_name = 0;
1225 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1226 pSymFile->aSymbols[i].st_value = 0;
1227 pSymFile->aSymbols[i].st_size = 0;
1228 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1229 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1230# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1231 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1232# endif
1233 i++;
1234
1235 pSymFile->aSymbols[i].st_name = 0;
1236 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1237 pSymFile->aSymbols[i].st_value = 0;
1238 pSymFile->aSymbols[i].st_size = 0;
1239 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1240 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1241 i++;
1242
1243 pSymFile->aSymbols[i].st_name = offStrTab;
1244 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1245# if 0
1246 pSymFile->aSymbols[i].st_shndx = iShText;
1247 pSymFile->aSymbols[i].st_value = 0;
1248# else
1249 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1250 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1251# endif
1252 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1256 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1257 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1258# endif
1259 i++;
1260
1261 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1262 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1263
1264 /*
1265 * The GDB JIT entry and informing GDB.
1266 */
1267 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1268# if 1
1269 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1270# else
1271 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1272# endif
1273
1274 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1275 RTCritSectEnter(&g_IemNativeGdbJitLock);
1276 pEhFrame->GdbJitEntry.pNext = NULL;
1277 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1278 if (__jit_debug_descriptor.pTail)
1279 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1280 else
1281 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1282 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1283 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1284
1285 /* Notify GDB: */
1286 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1287 __jit_debug_register_code();
1288 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1289 RTCritSectLeave(&g_IemNativeGdbJitLock);
1290
1291# else /* !IEMNATIVE_USE_GDB_JIT */
1292 RT_NOREF(pVCpu);
1293# endif /* !IEMNATIVE_USE_GDB_JIT */
1294
1295 return VINF_SUCCESS;
1296}
1297
1298# endif /* !RT_OS_WINDOWS */
1299#endif /* IN_RING3 */
1300
1301
1302/**
1303 * Adds another chunk to the executable memory allocator.
1304 *
1305 * This is used by the init code for the initial allocation and later by the
1306 * regular allocator function when it's out of memory.
1307 */
1308static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1309{
1310 /* Check that we've room for growth. */
1311 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1312 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1313
1314 /* Allocate a chunk. */
1315#ifdef RT_OS_DARWIN
1316 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1317#else
1318 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1319#endif
1320 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1321
1322#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1323 int rc = VINF_SUCCESS;
1324#else
1325 /* Initialize the heap for the chunk. */
1326 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1327 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1328 AssertRC(rc);
1329 if (RT_SUCCESS(rc))
1330 {
1331 /*
1332 * We want the memory to be aligned on 64 byte, so the first time thru
1333 * here we do some exploratory allocations to see how we can achieve this.
1334 * On subsequent runs we only make an initial adjustment allocation, if
1335 * necessary.
1336 *
1337 * Since we own the heap implementation, we know that the internal block
1338 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1339 * so all we need to wrt allocation size adjustments is to add 32 bytes
1340 * to the size, align up by 64 bytes, and subtract 32 bytes.
1341 *
1342 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1343 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1344 * allocation to force subsequent allocations to return 64 byte aligned
1345 * user areas.
1346 */
1347 if (!pExecMemAllocator->cbHeapBlockHdr)
1348 {
1349 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1350 pExecMemAllocator->cbHeapAlignTweak = 64;
1351 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1352 32 /*cbAlignment*/);
1353 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1354
1355 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1356 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1357 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1358 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1359 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1360
1361 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1362 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1363 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1364 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1365 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1366
1367 RTHeapSimpleFree(hHeap, pvTest2);
1368 RTHeapSimpleFree(hHeap, pvTest1);
1369 }
1370 else
1371 {
1372 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1373 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1374 }
1375 if (RT_SUCCESS(rc))
1376#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1377 {
1378 /*
1379 * Add the chunk.
1380 *
1381 * This must be done before the unwind init so windows can allocate
1382 * memory from the chunk when using the alternative sub-allocator.
1383 */
1384 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1385#ifdef IN_RING3
1386 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1387#endif
1388#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1389 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1390#else
1391 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1392 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1393 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1394 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1395#endif
1396
1397 pExecMemAllocator->cChunks = idxChunk + 1;
1398 pExecMemAllocator->idxChunkHint = idxChunk;
1399
1400#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1401 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1402 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1403#else
1404 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1405 pExecMemAllocator->cbTotal += cbFree;
1406 pExecMemAllocator->cbFree += cbFree;
1407#endif
1408
1409#ifdef IN_RING3
1410 /*
1411 * Initialize the unwind information (this cannot really fail atm).
1412 * (This sets pvUnwindInfo.)
1413 */
1414 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1415 if (RT_SUCCESS(rc))
1416#endif
1417 {
1418 return VINF_SUCCESS;
1419 }
1420
1421#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1422 /* Just in case the impossible happens, undo the above up: */
1423 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1424 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1425 pExecMemAllocator->cChunks = idxChunk;
1426 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1427 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1428 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1429 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1430#endif
1431 }
1432#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1433 }
1434#endif
1435 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1436 RT_NOREF(pVCpu);
1437 return rc;
1438}
1439
1440
1441/**
1442 * Initializes the executable memory allocator for native recompilation on the
1443 * calling EMT.
1444 *
1445 * @returns VBox status code.
1446 * @param pVCpu The cross context virtual CPU structure of the calling
1447 * thread.
1448 * @param cbMax The max size of the allocator.
1449 * @param cbInitial The initial allocator size.
1450 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1451 * dependent).
1452 */
1453int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1454{
1455 /*
1456 * Validate input.
1457 */
1458 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1459 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1460 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1461 || cbChunk == 0
1462 || ( RT_IS_POWER_OF_TWO(cbChunk)
1463 && cbChunk >= _1M
1464 && cbChunk <= _256M
1465 && cbChunk <= cbMax),
1466 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1467 VERR_OUT_OF_RANGE);
1468
1469 /*
1470 * Adjust/figure out the chunk size.
1471 */
1472 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1473 {
1474 if (cbMax >= _256M)
1475 cbChunk = _64M;
1476 else
1477 {
1478 if (cbMax < _16M)
1479 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1480 else
1481 cbChunk = (uint32_t)cbMax / 4;
1482 if (!RT_IS_POWER_OF_TWO(cbChunk))
1483 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1484 }
1485 }
1486
1487 if (cbChunk > cbMax)
1488 cbMax = cbChunk;
1489 else
1490 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1491 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1492 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1493
1494 /*
1495 * Allocate and initialize the allocatore instance.
1496 */
1497 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1498#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1499 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1500 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1501 cbNeeded += cbBitmap * cMaxChunks;
1502 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1503 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1504#endif
1505#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1506 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1507 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1508#endif
1509 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1510 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1511 VERR_NO_MEMORY);
1512 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1513 pExecMemAllocator->cbChunk = cbChunk;
1514 pExecMemAllocator->cMaxChunks = cMaxChunks;
1515 pExecMemAllocator->cChunks = 0;
1516 pExecMemAllocator->idxChunkHint = 0;
1517 pExecMemAllocator->cAllocations = 0;
1518 pExecMemAllocator->cbTotal = 0;
1519 pExecMemAllocator->cbFree = 0;
1520 pExecMemAllocator->cbAllocated = 0;
1521#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1522 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1523 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1524 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1525 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1526#endif
1527#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1528 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1529#endif
1530 for (uint32_t i = 0; i < cMaxChunks; i++)
1531 {
1532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1533 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1534 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1535#else
1536 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1537#endif
1538 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1539#ifdef IN_RING0
1540 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1541#else
1542 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1543#endif
1544 }
1545 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1546
1547 /*
1548 * Do the initial allocations.
1549 */
1550 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1551 {
1552 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1553 AssertLogRelRCReturn(rc, rc);
1554 }
1555
1556 pExecMemAllocator->idxChunkHint = 0;
1557
1558 return VINF_SUCCESS;
1559}
1560
1561
1562/*********************************************************************************************************************************
1563* Native Recompilation *
1564*********************************************************************************************************************************/
1565
1566
1567/**
1568 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1569 */
1570IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1571{
1572 pVCpu->iem.s.cInstructions += idxInstr;
1573 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1574}
1575
1576
1577/**
1578 * Used by TB code when it wants to raise a \#GP(0).
1579 */
1580IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1584#ifndef _MSC_VER
1585 return VINF_IEM_RAISED_XCPT; /* not reached */
1586#endif
1587}
1588
1589
1590/**
1591 * Reinitializes the native recompiler state.
1592 *
1593 * Called before starting a new recompile job.
1594 */
1595static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1596{
1597 pReNative->cLabels = 0;
1598 pReNative->bmLabelTypes = 0;
1599 pReNative->cFixups = 0;
1600#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1601 pReNative->pDbgInfo->cEntries = 0;
1602#endif
1603 pReNative->pTbOrg = pTb;
1604 pReNative->cCondDepth = 0;
1605 pReNative->uCondSeqNo = 0;
1606 pReNative->uCheckIrqSeqNo = 0;
1607
1608 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1609#if IEMNATIVE_HST_GREG_COUNT < 32
1610 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1611#endif
1612 ;
1613 pReNative->Core.bmHstRegsWithGstShadow = 0;
1614 pReNative->Core.bmGstRegShadows = 0;
1615 pReNative->Core.bmVars = 0;
1616 pReNative->Core.bmStack = 0;
1617 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1618 pReNative->Core.u64ArgVars = UINT64_MAX;
1619
1620 /* Full host register reinit: */
1621 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1622 {
1623 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1624 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1625 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1626 }
1627
1628 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1629 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1630#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1631 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1632#endif
1633#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1634 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1635#endif
1636 );
1637 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1638 {
1639 fRegs &= ~RT_BIT_32(idxReg);
1640 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1641 }
1642
1643 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1644#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1645 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1646#endif
1647#ifdef IEMNATIVE_REG_FIXED_TMP0
1648 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1649#endif
1650 return pReNative;
1651}
1652
1653
1654/**
1655 * Allocates and initializes the native recompiler state.
1656 *
1657 * This is called the first time an EMT wants to recompile something.
1658 *
1659 * @returns Pointer to the new recompiler state.
1660 * @param pVCpu The cross context virtual CPU structure of the calling
1661 * thread.
1662 * @param pTb The TB that's about to be recompiled.
1663 * @thread EMT(pVCpu)
1664 */
1665static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1666{
1667 VMCPU_ASSERT_EMT(pVCpu);
1668
1669 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1670 AssertReturn(pReNative, NULL);
1671
1672 /*
1673 * Try allocate all the buffers and stuff we need.
1674 */
1675 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1676 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1677 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1678#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1679 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1680#endif
1681 if (RT_LIKELY( pReNative->pInstrBuf
1682 && pReNative->paLabels
1683 && pReNative->paFixups)
1684#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1685 && pReNative->pDbgInfo
1686#endif
1687 )
1688 {
1689 /*
1690 * Set the buffer & array sizes on success.
1691 */
1692 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1693 pReNative->cLabelsAlloc = _8K;
1694 pReNative->cFixupsAlloc = _16K;
1695#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1696 pReNative->cDbgInfoAlloc = _16K;
1697#endif
1698
1699 /*
1700 * Done, just need to save it and reinit it.
1701 */
1702 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1703 return iemNativeReInit(pReNative, pTb);
1704 }
1705
1706 /*
1707 * Failed. Cleanup and return.
1708 */
1709 AssertFailed();
1710 RTMemFree(pReNative->pInstrBuf);
1711 RTMemFree(pReNative->paLabels);
1712 RTMemFree(pReNative->paFixups);
1713#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1714 RTMemFree(pReNative->pDbgInfo);
1715#endif
1716 RTMemFree(pReNative);
1717 return NULL;
1718}
1719
1720
1721/**
1722 * Creates a label
1723 *
1724 * If the label does not yet have a defined position,
1725 * call iemNativeLabelDefine() later to set it.
1726 *
1727 * @returns Label ID. Throws VBox status code on failure, so no need to check
1728 * the return value.
1729 * @param pReNative The native recompile state.
1730 * @param enmType The label type.
1731 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1732 * label is not yet defined (default).
1733 * @param uData Data associated with the lable. Only applicable to
1734 * certain type of labels. Default is zero.
1735 */
1736DECL_HIDDEN_THROW(uint32_t)
1737iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1738 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1739{
1740 /*
1741 * Locate existing label definition.
1742 *
1743 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1744 * and uData is zero.
1745 */
1746 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1747 uint32_t const cLabels = pReNative->cLabels;
1748 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1749#ifndef VBOX_STRICT
1750 && offWhere == UINT32_MAX
1751 && uData == 0
1752#endif
1753 )
1754 {
1755 /** @todo Since this is only used for labels with uData = 0, just use a
1756 * lookup array? */
1757 for (uint32_t i = 0; i < cLabels; i++)
1758 if ( paLabels[i].enmType == enmType
1759 && paLabels[i].uData == uData)
1760 {
1761#ifdef VBOX_STRICT
1762 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1763 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1764#endif
1765 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1766 return i;
1767 }
1768 }
1769
1770 /*
1771 * Make sure we've got room for another label.
1772 */
1773 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1774 { /* likely */ }
1775 else
1776 {
1777 uint32_t cNew = pReNative->cLabelsAlloc;
1778 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1779 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1780 cNew *= 2;
1781 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1782 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1783 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1784 pReNative->paLabels = paLabels;
1785 pReNative->cLabelsAlloc = cNew;
1786 }
1787
1788 /*
1789 * Define a new label.
1790 */
1791 paLabels[cLabels].off = offWhere;
1792 paLabels[cLabels].enmType = enmType;
1793 paLabels[cLabels].uData = uData;
1794 pReNative->cLabels = cLabels + 1;
1795
1796 Assert((unsigned)enmType < 64);
1797 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1798
1799 if (offWhere != UINT32_MAX)
1800 {
1801#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1802 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1803 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1804#endif
1805 }
1806 return cLabels;
1807}
1808
1809
1810/**
1811 * Defines the location of an existing label.
1812 *
1813 * @param pReNative The native recompile state.
1814 * @param idxLabel The label to define.
1815 * @param offWhere The position.
1816 */
1817DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1818{
1819 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1820 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1821 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1822 pLabel->off = offWhere;
1823#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1824 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1825 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1826#endif
1827}
1828
1829
1830/**
1831 * Looks up a lable.
1832 *
1833 * @returns Label ID if found, UINT32_MAX if not.
1834 */
1835static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1836 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1837{
1838 Assert((unsigned)enmType < 64);
1839 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1840 {
1841 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1842 uint32_t const cLabels = pReNative->cLabels;
1843 for (uint32_t i = 0; i < cLabels; i++)
1844 if ( paLabels[i].enmType == enmType
1845 && paLabels[i].uData == uData
1846 && ( paLabels[i].off == offWhere
1847 || offWhere == UINT32_MAX
1848 || paLabels[i].off == UINT32_MAX))
1849 return i;
1850 }
1851 return UINT32_MAX;
1852}
1853
1854
1855/**
1856 * Adds a fixup.
1857 *
1858 * @throws VBox status code (int) on failure.
1859 * @param pReNative The native recompile state.
1860 * @param offWhere The instruction offset of the fixup location.
1861 * @param idxLabel The target label ID for the fixup.
1862 * @param enmType The fixup type.
1863 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1864 */
1865DECL_HIDDEN_THROW(void)
1866iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1867 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1868{
1869 Assert(idxLabel <= UINT16_MAX);
1870 Assert((unsigned)enmType <= UINT8_MAX);
1871
1872 /*
1873 * Make sure we've room.
1874 */
1875 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1876 uint32_t const cFixups = pReNative->cFixups;
1877 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1878 { /* likely */ }
1879 else
1880 {
1881 uint32_t cNew = pReNative->cFixupsAlloc;
1882 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1883 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1884 cNew *= 2;
1885 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1886 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1887 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1888 pReNative->paFixups = paFixups;
1889 pReNative->cFixupsAlloc = cNew;
1890 }
1891
1892 /*
1893 * Add the fixup.
1894 */
1895 paFixups[cFixups].off = offWhere;
1896 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1897 paFixups[cFixups].enmType = enmType;
1898 paFixups[cFixups].offAddend = offAddend;
1899 pReNative->cFixups = cFixups + 1;
1900}
1901
1902
1903/**
1904 * Slow code path for iemNativeInstrBufEnsure.
1905 */
1906DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1907{
1908 /* Double the buffer size till we meet the request. */
1909 uint32_t cNew = pReNative->cInstrBufAlloc;
1910 AssertReturn(cNew > 0, NULL);
1911 do
1912 cNew *= 2;
1913 while (cNew < off + cInstrReq);
1914
1915 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1916#ifdef RT_ARCH_ARM64
1917 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1918#else
1919 uint32_t const cbMaxInstrBuf = _2M;
1920#endif
1921 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1922
1923 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1924 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1925
1926 pReNative->cInstrBufAlloc = cNew;
1927 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1928}
1929
1930#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1931
1932/**
1933 * Grows the static debug info array used during recompilation.
1934 *
1935 * @returns Pointer to the new debug info block; throws VBox status code on
1936 * failure, so no need to check the return value.
1937 */
1938DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1939{
1940 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1941 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1942 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1943 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1944 pReNative->pDbgInfo = pDbgInfo;
1945 pReNative->cDbgInfoAlloc = cNew;
1946 return pDbgInfo;
1947}
1948
1949
1950/**
1951 * Adds a new debug info uninitialized entry, returning the pointer to it.
1952 */
1953DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1954{
1955 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1956 { /* likely */ }
1957 else
1958 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1959 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1960}
1961
1962
1963/**
1964 * Debug Info: Adds a native offset record, if necessary.
1965 */
1966static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1967{
1968 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1969
1970 /*
1971 * Search backwards to see if we've got a similar record already.
1972 */
1973 uint32_t idx = pDbgInfo->cEntries;
1974 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1975 while (idx-- > idxStop)
1976 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1977 {
1978 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1979 return;
1980 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1981 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1982 break;
1983 }
1984
1985 /*
1986 * Add it.
1987 */
1988 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1989 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1990 pEntry->NativeOffset.offNative = off;
1991}
1992
1993
1994/**
1995 * Debug Info: Record info about a label.
1996 */
1997static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
1998{
1999 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2000 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2001 pEntry->Label.uUnused = 0;
2002 pEntry->Label.enmLabel = (uint8_t)enmType;
2003 pEntry->Label.uData = uData;
2004}
2005
2006
2007/**
2008 * Debug Info: Record info about a threaded call.
2009 */
2010static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2011{
2012 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2013 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2014 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2015 pEntry->ThreadedCall.uUnused = 0;
2016 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2017}
2018
2019
2020/**
2021 * Debug Info: Record info about a new guest instruction.
2022 */
2023static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2024{
2025 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2026 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2027 pEntry->GuestInstruction.uUnused = 0;
2028 pEntry->GuestInstruction.fExec = fExec;
2029}
2030
2031
2032/**
2033 * Debug Info: Record info about guest register shadowing.
2034 */
2035static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2036 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2037{
2038 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2039 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2040 pEntry->GuestRegShadowing.uUnused = 0;
2041 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2042 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2043 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2044}
2045
2046#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2047
2048
2049/*********************************************************************************************************************************
2050* Register Allocator *
2051*********************************************************************************************************************************/
2052
2053/**
2054 * Register parameter indexes (indexed by argument number).
2055 */
2056DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2057{
2058 IEMNATIVE_CALL_ARG0_GREG,
2059 IEMNATIVE_CALL_ARG1_GREG,
2060 IEMNATIVE_CALL_ARG2_GREG,
2061 IEMNATIVE_CALL_ARG3_GREG,
2062#if defined(IEMNATIVE_CALL_ARG4_GREG)
2063 IEMNATIVE_CALL_ARG4_GREG,
2064# if defined(IEMNATIVE_CALL_ARG5_GREG)
2065 IEMNATIVE_CALL_ARG5_GREG,
2066# if defined(IEMNATIVE_CALL_ARG6_GREG)
2067 IEMNATIVE_CALL_ARG6_GREG,
2068# if defined(IEMNATIVE_CALL_ARG7_GREG)
2069 IEMNATIVE_CALL_ARG7_GREG,
2070# endif
2071# endif
2072# endif
2073#endif
2074};
2075
2076/**
2077 * Call register masks indexed by argument count.
2078 */
2079DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2080{
2081 0,
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2083 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2084 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2085 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2086 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2087#if defined(IEMNATIVE_CALL_ARG4_GREG)
2088 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2089 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2090# if defined(IEMNATIVE_CALL_ARG5_GREG)
2091 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2092 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2093# if defined(IEMNATIVE_CALL_ARG6_GREG)
2094 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2095 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2096 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2097# if defined(IEMNATIVE_CALL_ARG7_GREG)
2098 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2099 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2100 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2101# endif
2102# endif
2103# endif
2104#endif
2105};
2106
2107#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2108/**
2109 * BP offset of the stack argument slots.
2110 *
2111 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2112 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2113 */
2114DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2115{
2116 IEMNATIVE_FP_OFF_STACK_ARG0,
2117# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2118 IEMNATIVE_FP_OFF_STACK_ARG1,
2119# endif
2120# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2121 IEMNATIVE_FP_OFF_STACK_ARG2,
2122# endif
2123# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2124 IEMNATIVE_FP_OFF_STACK_ARG3,
2125# endif
2126};
2127AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2128#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2129
2130/**
2131 * Info about shadowed guest register values.
2132 * @see IEMNATIVEGSTREG
2133 */
2134static struct
2135{
2136 /** Offset in VMCPU. */
2137 uint32_t off;
2138 /** The field size. */
2139 uint8_t cb;
2140 /** Name (for logging). */
2141 const char *pszName;
2142} const g_aGstShadowInfo[] =
2143{
2144#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2145 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2146 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2147 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2148 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2160 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2161 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2162 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2163 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2164 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2165 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2166 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2167 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2168 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2169 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2170 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2171 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2172 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2173 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2174 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2175 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2176 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2177 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2178 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2179 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2180 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2181#undef CPUMCTX_OFF_AND_SIZE
2182};
2183AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2184
2185
2186/** Host CPU general purpose register names. */
2187DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2188{
2189#ifdef RT_ARCH_AMD64
2190 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2191#elif RT_ARCH_ARM64
2192 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2193 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2194#else
2195# error "port me"
2196#endif
2197};
2198
2199
2200DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2201 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2202{
2203 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2204
2205 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2206 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2207 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2208 return (uint8_t)idxReg;
2209}
2210
2211
2212/**
2213 * Tries to locate a suitable register in the given register mask.
2214 *
2215 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2216 * failed.
2217 *
2218 * @returns Host register number on success, returns UINT8_MAX on failure.
2219 */
2220static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2221{
2222 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2223 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2224 if (fRegs)
2225 {
2226 /** @todo pick better here: */
2227 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2228
2229 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2230 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2231 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2232 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2233
2234 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2235 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2236 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2237 return idxReg;
2238 }
2239 return UINT8_MAX;
2240}
2241
2242
2243/**
2244 * Locate a register, possibly freeing one up.
2245 *
2246 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2247 * failed.
2248 *
2249 * @returns Host register number on success. Returns UINT8_MAX if no registers
2250 * found, the caller is supposed to deal with this and raise a
2251 * allocation type specific status code (if desired).
2252 *
2253 * @throws VBox status code if we're run into trouble spilling a variable of
2254 * recording debug info. Does NOT throw anything if we're out of
2255 * registers, though.
2256 */
2257static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2258 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2259{
2260 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2261 Assert(!(fRegMask & ~IEMNATIVE_REG_FIXED_MASK));
2262
2263 /*
2264 * Try a freed register that's shadowing a guest register
2265 */
2266 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2267 if (fRegs)
2268 {
2269 unsigned const idxReg = (fPreferVolatile
2270 ? ASMBitFirstSetU32(fRegs)
2271 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2272 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2273 - 1;
2274
2275 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2276 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2277 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2278 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2279
2280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2281 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2282 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2283 return idxReg;
2284 }
2285
2286 /*
2287 * Try free up a variable that's in a register.
2288 *
2289 * We do two rounds here, first evacuating variables we don't need to be
2290 * saved on the stack, then in the second round move things to the stack.
2291 */
2292 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2293 {
2294 uint32_t fVars = pReNative->Core.bmVars;
2295 while (fVars)
2296 {
2297 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2298 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2299 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2300 && (RT_BIT_32(idxReg) & fRegMask)
2301 && ( iLoop == 0
2302 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2303 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2304 {
2305 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2306 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2307 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2308 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2309 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2310
2311 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2312 {
2313 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
2314 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2315 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff,
2316 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2317 - IEMNATIVE_FP_OFF_STACK_VARS,
2318 idxReg);
2319 }
2320
2321 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2322 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2323 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2324 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2325 return idxReg;
2326 }
2327 fVars &= ~RT_BIT_32(idxVar);
2328 }
2329 }
2330
2331 return UINT8_MAX;
2332}
2333
2334
2335/**
2336 * Moves a variable to a different register or spills it onto the stack.
2337 *
2338 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2339 * kinds can easily be recreated if needed later.
2340 *
2341 * @returns The new code buffer position, UINT32_MAX on failure.
2342 * @param pReNative The native recompile state.
2343 * @param off The current code buffer position.
2344 * @param idxVar The variable index.
2345 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2346 * call-volatile registers.
2347 */
2348static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2349 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2350{
2351 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2352 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2353
2354 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2355 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2356 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2357 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2358 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2359 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2360 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2361 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2362
2363
2364 /** @todo Add statistics on this.*/
2365 /** @todo Implement basic variable liveness analysis (python) so variables
2366 * can be freed immediately once no longer used. This has the potential to
2367 * be trashing registers and stack for dead variables. */
2368
2369 /*
2370 * First try move it to a different register, as that's cheaper.
2371 */
2372 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2373 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2374 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2375 if (fRegs)
2376 {
2377 /* Avoid using shadow registers, if possible. */
2378 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2379 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2380 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2381
2382 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2383 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2384 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2385 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2386 if (fGstRegShadows)
2387 {
2388 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2389 while (fGstRegShadows)
2390 {
2391 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2392 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2393
2394 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2395 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2396 }
2397 }
2398
2399 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2400 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2401 }
2402 /*
2403 * Otherwise we must spill the register onto the stack.
2404 */
2405 else
2406 {
2407 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2408 off = iemNativeEmitStoreGprByBp(pReNative, off,
2409 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2410 - IEMNATIVE_FP_OFF_STACK_VARS,
2411 idxRegOld);
2412
2413 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2414 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2415 }
2416
2417 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2418 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2419 return off;
2420}
2421
2422
2423/**
2424 * Allocates a temporary host general purpose register.
2425 *
2426 * This may emit code to save register content onto the stack in order to free
2427 * up a register.
2428 *
2429 * @returns The host register number; throws VBox status code on failure,
2430 * so no need to check the return value.
2431 * @param pReNative The native recompile state.
2432 * @param poff Pointer to the variable with the code buffer position.
2433 * This will be update if we need to move a variable from
2434 * register to stack in order to satisfy the request.
2435 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2436 * registers (@c true, default) or the other way around
2437 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2438 */
2439DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2440{
2441 /*
2442 * Try find a completely unused register, preferably a call-volatile one.
2443 */
2444 uint8_t idxReg;
2445 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2446 & ~pReNative->Core.bmHstRegsWithGstShadow
2447 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2448 if (fRegs)
2449 {
2450 if (fPreferVolatile)
2451 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2452 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2453 else
2454 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2455 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2456 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2457 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2458 }
2459 else
2460 {
2461 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2462 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2463 }
2464 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2465}
2466
2467
2468/**
2469 * Allocates a temporary register for loading an immediate value into.
2470 *
2471 * This will emit code to load the immediate, unless there happens to be an
2472 * unused register with the value already loaded.
2473 *
2474 * The caller will not modify the returned register, it must be considered
2475 * read-only. Free using iemNativeRegFreeTmpImm.
2476 *
2477 * @returns The host register number; throws VBox status code on failure, so no
2478 * need to check the return value.
2479 * @param pReNative The native recompile state.
2480 * @param poff Pointer to the variable with the code buffer position.
2481 * @param uImm The immediate value that the register must hold upon
2482 * return.
2483 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2484 * registers (@c true, default) or the other way around
2485 * (@c false).
2486 *
2487 * @note Reusing immediate values has not been implemented yet.
2488 */
2489DECL_HIDDEN_THROW(uint8_t)
2490iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2491{
2492 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2493 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2494 return idxReg;
2495}
2496
2497
2498/**
2499 * Marks host register @a idxHstReg as containing a shadow copy of guest
2500 * register @a enmGstReg.
2501 *
2502 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2503 * host register before calling.
2504 */
2505DECL_FORCE_INLINE(void)
2506iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2507{
2508 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2509
2510 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2511 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2512 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2513 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2514#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2515 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2516 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2517#else
2518 RT_NOREF(off);
2519#endif
2520}
2521
2522
2523/**
2524 * Clear any guest register shadow claims from @a idxHstReg.
2525 *
2526 * The register does not need to be shadowing any guest registers.
2527 */
2528DECL_FORCE_INLINE(void)
2529iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2530{
2531 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2532 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2533 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2534 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2535
2536#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2537 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2538 if (fGstRegs)
2539 {
2540 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2541 while (fGstRegs)
2542 {
2543 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2544 fGstRegs &= ~RT_BIT_64(iGstReg);
2545 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2546 }
2547 }
2548#else
2549 RT_NOREF(off);
2550#endif
2551
2552 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2553 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2554 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2555}
2556
2557
2558/**
2559 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2560 * to @a idxRegTo.
2561 */
2562DECL_FORCE_INLINE(void)
2563iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2564 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2565{
2566 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2567 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2568 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2569 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2570 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2571
2572 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2573 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2574 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2575#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2576 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2577 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2578#else
2579 RT_NOREF(off);
2580#endif
2581}
2582
2583
2584/**
2585 * Allocates a temporary host general purpose register for keeping a guest
2586 * register value.
2587 *
2588 * Since we may already have a register holding the guest register value,
2589 * code will be emitted to do the loading if that's not the case. Code may also
2590 * be emitted if we have to free up a register to satify the request.
2591 *
2592 * @returns The host register number; throws VBox status code on failure, so no
2593 * need to check the return value.
2594 * @param pReNative The native recompile state.
2595 * @param poff Pointer to the variable with the code buffer
2596 * position. This will be update if we need to move a
2597 * variable from register to stack in order to satisfy
2598 * the request.
2599 * @param enmGstReg The guest register that will is to be updated.
2600 * @param enmIntendedUse How the caller will be using the host register.
2601 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2602 */
2603DECL_HIDDEN_THROW(uint8_t)
2604iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2605 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2606{
2607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2608#ifdef LOG_ENABLED
2609 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2610#endif
2611
2612 /*
2613 * First check if the guest register value is already in a host register.
2614 */
2615 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2616 {
2617 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2618 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2619 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2620 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2621
2622 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2623 {
2624 /*
2625 * If the register will trash the guest shadow copy, try find a
2626 * completely unused register we can use instead. If that fails,
2627 * we need to disassociate the host reg from the guest reg.
2628 */
2629 /** @todo would be nice to know if preserving the register is in any way helpful. */
2630 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2631 && ( ~pReNative->Core.bmHstRegs
2632 & ~pReNative->Core.bmHstRegsWithGstShadow
2633 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2634 {
2635 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2636
2637 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2638
2639 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2640 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2641 g_apszIemNativeHstRegNames[idxRegNew]));
2642 idxReg = idxRegNew;
2643 }
2644 else
2645 {
2646 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2647 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2648 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2649 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2650 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2651 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2652 else
2653 {
2654 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2655 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2656 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2657 }
2658 }
2659 }
2660 else
2661 {
2662 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2663 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2664
2665 /*
2666 * Allocate a new register, copy the value and, if updating, the
2667 * guest shadow copy assignment to the new register.
2668 */
2669 /** @todo share register for readonly access. */
2670 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2671
2672 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2673
2674 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2675 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2676 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2677 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2678 else
2679 {
2680 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2681 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2682 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2683 g_apszIemNativeHstRegNames[idxRegNew]));
2684 }
2685 idxReg = idxRegNew;
2686 }
2687
2688#ifdef VBOX_STRICT
2689 /* Strict builds: Check that the value is correct. */
2690 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2691#endif
2692
2693 return idxReg;
2694 }
2695
2696 /*
2697 * Allocate a new register, load it with the guest value and designate it as a copy of the
2698 */
2699 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2700
2701 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2702
2703 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2704 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2705 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2706 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2707
2708 return idxRegNew;
2709}
2710
2711
2712/**
2713 * Allocates a temporary host general purpose register that already holds the
2714 * given guest register value.
2715 *
2716 * The use case for this function is places where the shadowing state cannot be
2717 * modified due to branching and such. This will fail if the we don't have a
2718 * current shadow copy handy or if it's incompatible. The only code that will
2719 * be emitted here is value checking code in strict builds.
2720 *
2721 * The intended use can only be readonly!
2722 *
2723 * @returns The host register number, UINT8_MAX if not present.
2724 * @param pReNative The native recompile state.
2725 * @param poff Pointer to the instruction buffer offset.
2726 * Will be updated in strict builds if a register is
2727 * found.
2728 * @param enmGstReg The guest register that will is to be updated.
2729 * @note In strict builds, this may throw instruction buffer growth failures.
2730 * Non-strict builds will not throw anything.
2731 * @sa iemNativeRegAllocTmpForGuestReg
2732 */
2733DECL_HIDDEN_THROW(uint8_t)
2734iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2735{
2736 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2737
2738 /*
2739 * First check if the guest register value is already in a host register.
2740 */
2741 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2742 {
2743 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2744 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2745 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2746 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2747
2748 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2749 {
2750 /*
2751 * We only do readonly use here, so easy compared to the other
2752 * variant of this code.
2753 */
2754 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2755 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2756 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2757 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2758 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2759
2760#ifdef VBOX_STRICT
2761 /* Strict builds: Check that the value is correct. */
2762 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2763#else
2764 RT_NOREF(poff);
2765#endif
2766 return idxReg;
2767 }
2768 }
2769
2770 return UINT8_MAX;
2771}
2772
2773
2774DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2775
2776
2777/**
2778 * Allocates argument registers for a function call.
2779 *
2780 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2781 * need to check the return value.
2782 * @param pReNative The native recompile state.
2783 * @param off The current code buffer offset.
2784 * @param cArgs The number of arguments the function call takes.
2785 */
2786DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2787{
2788 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2789 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2790 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2791 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2792
2793 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2794 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2795 else if (cArgs == 0)
2796 return true;
2797
2798 /*
2799 * Do we get luck and all register are free and not shadowing anything?
2800 */
2801 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2802 for (uint32_t i = 0; i < cArgs; i++)
2803 {
2804 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2805 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2806 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2807 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2808 }
2809 /*
2810 * Okay, not lucky so we have to free up the registers.
2811 */
2812 else
2813 for (uint32_t i = 0; i < cArgs; i++)
2814 {
2815 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2816 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2817 {
2818 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2819 {
2820 case kIemNativeWhat_Var:
2821 {
2822 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2823 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2824 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2825 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2826 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2827
2828 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2829 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2830 else
2831 {
2832 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2833 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2834 }
2835 break;
2836 }
2837
2838 case kIemNativeWhat_Tmp:
2839 case kIemNativeWhat_Arg:
2840 case kIemNativeWhat_rc:
2841 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2842 default:
2843 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2844 }
2845
2846 }
2847 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2848 {
2849 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2850 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2851 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2852 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2853 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2854 }
2855 else
2856 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2857 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2858 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2859 }
2860 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2861 return true;
2862}
2863
2864
2865DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2866
2867
2868#if 0
2869/**
2870 * Frees a register assignment of any type.
2871 *
2872 * @param pReNative The native recompile state.
2873 * @param idxHstReg The register to free.
2874 *
2875 * @note Does not update variables.
2876 */
2877DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2878{
2879 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2880 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2881 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2882 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2883 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2884 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2885 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2886 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2887 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2888 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2889 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2890 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2891 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2892 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2893
2894 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2895 /* no flushing, right:
2896 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2897 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2898 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2899 */
2900}
2901#endif
2902
2903
2904/**
2905 * Frees a temporary register.
2906 *
2907 * Any shadow copies of guest registers assigned to the host register will not
2908 * be flushed by this operation.
2909 */
2910DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2911{
2912 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2913 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2914 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2915 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2916 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2917}
2918
2919
2920/**
2921 * Frees a temporary immediate register.
2922 *
2923 * It is assumed that the call has not modified the register, so it still hold
2924 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2925 */
2926DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2927{
2928 iemNativeRegFreeTmp(pReNative, idxHstReg);
2929}
2930
2931
2932/**
2933 * Called right before emitting a call instruction to move anything important
2934 * out of call-volatile registers, free and flush the call-volatile registers,
2935 * optionally freeing argument variables.
2936 *
2937 * @returns New code buffer offset, UINT32_MAX on failure.
2938 * @param pReNative The native recompile state.
2939 * @param off The code buffer offset.
2940 * @param cArgs The number of arguments the function call takes.
2941 * It is presumed that the host register part of these have
2942 * been allocated as such already and won't need moving,
2943 * just freeing.
2944 */
2945DECL_HIDDEN_THROW(uint32_t)
2946iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2947{
2948 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
2949
2950 /*
2951 * Move anything important out of volatile registers.
2952 */
2953 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2954 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2955 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2956#ifdef IEMNATIVE_REG_FIXED_TMP0
2957 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2958#endif
2959 & ~g_afIemNativeCallRegs[cArgs];
2960
2961 fRegsToMove &= pReNative->Core.bmHstRegs;
2962 if (!fRegsToMove)
2963 { /* likely */ }
2964 else
2965 while (fRegsToMove != 0)
2966 {
2967 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2968 fRegsToMove &= ~RT_BIT_32(idxReg);
2969
2970 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2971 {
2972 case kIemNativeWhat_Var:
2973 {
2974 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2975 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2976 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2977 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2978 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2979 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2980 else
2981 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2982 continue;
2983 }
2984
2985 case kIemNativeWhat_Arg:
2986 AssertMsgFailed(("What?!?: %u\n", idxReg));
2987 continue;
2988
2989 case kIemNativeWhat_rc:
2990 case kIemNativeWhat_Tmp:
2991 AssertMsgFailed(("Missing free: %u\n", idxReg));
2992 continue;
2993
2994 case kIemNativeWhat_FixedTmp:
2995 case kIemNativeWhat_pVCpuFixed:
2996 case kIemNativeWhat_pCtxFixed:
2997 case kIemNativeWhat_FixedReserved:
2998 case kIemNativeWhat_Invalid:
2999 case kIemNativeWhat_End:
3000 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3001 }
3002 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3003 }
3004
3005 /*
3006 * Do the actual freeing.
3007 */
3008 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3009
3010 /* If there are guest register shadows in any call-volatile register, we
3011 have to clear the corrsponding guest register masks for each register. */
3012 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3013 if (fHstRegsWithGstShadow)
3014 {
3015 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3016 do
3017 {
3018 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3019 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
3020
3021 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3022 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3023 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3024 } while (fHstRegsWithGstShadow != 0);
3025 }
3026
3027 return off;
3028}
3029
3030
3031/**
3032 * Flushes a set of guest register shadow copies.
3033 *
3034 * This is usually done after calling a threaded function or a C-implementation
3035 * of an instruction.
3036 *
3037 * @param pReNative The native recompile state.
3038 * @param fGstRegs Set of guest registers to flush.
3039 */
3040DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3041{
3042 /*
3043 * Reduce the mask by what's currently shadowed
3044 */
3045 fGstRegs &= pReNative->Core.bmGstRegShadows;
3046 if (fGstRegs)
3047 {
3048 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3049 if (pReNative->Core.bmGstRegShadows)
3050 {
3051 /*
3052 * Partial.
3053 */
3054 do
3055 {
3056 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3057 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3058 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3059 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3060 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3061
3062 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3063 fGstRegs &= ~fInThisHstReg;
3064 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3065 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3066 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3067 } while (fGstRegs != 0);
3068 }
3069 else
3070 {
3071 /*
3072 * Clear all.
3073 */
3074 do
3075 {
3076 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3077 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3078 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3079 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3080 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3081
3082 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3083 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3084 } while (fGstRegs != 0);
3085 pReNative->Core.bmHstRegsWithGstShadow = 0;
3086 }
3087 }
3088}
3089
3090
3091/**
3092 * Flushes any delayed guest register writes.
3093 *
3094 * This must be called prior to calling CImpl functions and any helpers that use
3095 * the guest state (like raising exceptions) and such.
3096 *
3097 * This optimization has not yet been implemented. The first target would be
3098 * RIP updates, since these are the most common ones.
3099 */
3100DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3101{
3102 RT_NOREF(pReNative, off);
3103 return off;
3104}
3105
3106
3107/*********************************************************************************************************************************
3108* Code Emitters (larger snippets) *
3109*********************************************************************************************************************************/
3110
3111/**
3112 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3113 * extending to 64-bit width.
3114 *
3115 * @returns New code buffer offset on success, UINT32_MAX on failure.
3116 * @param pReNative .
3117 * @param off The current code buffer position.
3118 * @param idxHstReg The host register to load the guest register value into.
3119 * @param enmGstReg The guest register to load.
3120 *
3121 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3122 * that is something the caller needs to do if applicable.
3123 */
3124DECL_HIDDEN_THROW(uint32_t)
3125iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3126{
3127 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3128 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3129
3130 switch (g_aGstShadowInfo[enmGstReg].cb)
3131 {
3132 case sizeof(uint64_t):
3133 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3134 case sizeof(uint32_t):
3135 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3136 case sizeof(uint16_t):
3137 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3138#if 0 /* not present in the table. */
3139 case sizeof(uint8_t):
3140 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3141#endif
3142 default:
3143 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3144 }
3145}
3146
3147
3148#ifdef VBOX_STRICT
3149/**
3150 * Emitting code that checks that the content of register @a idxReg is the same
3151 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3152 * instruction if that's not the case.
3153 *
3154 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3155 * Trashes EFLAGS on AMD64.
3156 */
3157static uint32_t
3158iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3159{
3160# ifdef RT_ARCH_AMD64
3161 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3162
3163 /* cmp reg, [mem] */
3164 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3165 {
3166 if (idxReg >= 8)
3167 pbCodeBuf[off++] = X86_OP_REX_R;
3168 pbCodeBuf[off++] = 0x38;
3169 }
3170 else
3171 {
3172 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3173 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3174 else
3175 {
3176 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3177 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3178 else
3179 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3181 if (idxReg >= 8)
3182 pbCodeBuf[off++] = X86_OP_REX_R;
3183 }
3184 pbCodeBuf[off++] = 0x39;
3185 }
3186 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3187
3188 /* je/jz +1 */
3189 pbCodeBuf[off++] = 0x74;
3190 pbCodeBuf[off++] = 0x01;
3191
3192 /* int3 */
3193 pbCodeBuf[off++] = 0xcc;
3194
3195 /* For values smaller than the register size, we must check that the rest
3196 of the register is all zeros. */
3197 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3198 {
3199 /* test reg64, imm32 */
3200 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3201 pbCodeBuf[off++] = 0xf7;
3202 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3203 pbCodeBuf[off++] = 0;
3204 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3205 pbCodeBuf[off++] = 0xff;
3206 pbCodeBuf[off++] = 0xff;
3207
3208 /* je/jz +1 */
3209 pbCodeBuf[off++] = 0x74;
3210 pbCodeBuf[off++] = 0x01;
3211
3212 /* int3 */
3213 pbCodeBuf[off++] = 0xcc;
3214 }
3215 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3216 {
3217 /* rol reg64, 32 */
3218 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3219 pbCodeBuf[off++] = 0xc1;
3220 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3221 pbCodeBuf[off++] = 32;
3222
3223 /* test reg32, ffffffffh */
3224 if (idxReg >= 8)
3225 pbCodeBuf[off++] = X86_OP_REX_B;
3226 pbCodeBuf[off++] = 0xf7;
3227 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3228 pbCodeBuf[off++] = 0xff;
3229 pbCodeBuf[off++] = 0xff;
3230 pbCodeBuf[off++] = 0xff;
3231 pbCodeBuf[off++] = 0xff;
3232
3233 /* je/jz +1 */
3234 pbCodeBuf[off++] = 0x74;
3235 pbCodeBuf[off++] = 0x01;
3236
3237 /* int3 */
3238 pbCodeBuf[off++] = 0xcc;
3239
3240 /* rol reg64, 32 */
3241 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3242 pbCodeBuf[off++] = 0xc1;
3243 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3244 pbCodeBuf[off++] = 32;
3245 }
3246
3247# elif defined(RT_ARCH_ARM64)
3248 /* mov TMP0, [gstreg] */
3249 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3250
3251 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3252 /* sub tmp0, tmp0, idxReg */
3253 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3254 /* cbz tmp0, +1 */
3255 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3256 /* brk #0x1000+enmGstReg */
3257 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3258
3259# else
3260# error "Port me!"
3261# endif
3262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3263 return off;
3264}
3265#endif /* VBOX_STRICT */
3266
3267
3268
3269/**
3270 * Emits a code for checking the return code of a call and rcPassUp, returning
3271 * from the code if either are non-zero.
3272 */
3273DECL_HIDDEN_THROW(uint32_t)
3274iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3275{
3276#ifdef RT_ARCH_AMD64
3277 /*
3278 * AMD64: eax = call status code.
3279 */
3280
3281 /* edx = rcPassUp */
3282 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3283# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3284 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3285# endif
3286
3287 /* edx = eax | rcPassUp */
3288 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3289 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3290 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3291 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3292
3293 /* Jump to non-zero status return path. */
3294 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3295
3296 /* done. */
3297
3298#elif RT_ARCH_ARM64
3299 /*
3300 * ARM64: w0 = call status code.
3301 */
3302 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3303 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3304
3305 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3306
3307 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3308
3309 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3310 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3311 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3312
3313#else
3314# error "port me"
3315#endif
3316 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3317 return off;
3318}
3319
3320
3321/**
3322 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3323 * raising a \#GP(0) if it isn't.
3324 *
3325 * @returns New code buffer offset, UINT32_MAX on failure.
3326 * @param pReNative The native recompile state.
3327 * @param off The code buffer offset.
3328 * @param idxAddrReg The host register with the address to check.
3329 * @param idxInstr The current instruction.
3330 */
3331DECL_HIDDEN_THROW(uint32_t)
3332iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3333{
3334 RT_NOREF(idxInstr);
3335
3336 /*
3337 * Make sure we don't have any outstanding guest register writes as we may
3338 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3339 */
3340 off = iemNativeRegFlushPendingWrites(pReNative, off);
3341
3342#ifdef RT_ARCH_AMD64
3343 /*
3344 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3345 * return raisexcpt();
3346 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3347 */
3348 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3349
3350 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3351 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3352 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3353 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3354
3355# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3356 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3357# else
3358 uint32_t const offFixup = off;
3359 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3360 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3361 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3362 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3363# endif
3364
3365 iemNativeRegFreeTmp(pReNative, iTmpReg);
3366
3367#elif defined(RT_ARCH_ARM64)
3368 /*
3369 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3370 * return raisexcpt();
3371 * ----
3372 * mov x1, 0x800000000000
3373 * add x1, x0, x1
3374 * cmp xzr, x1, lsr 48
3375 * and either:
3376 * b.ne .Lraisexcpt
3377 * or:
3378 * b.eq .Lnoexcept
3379 * movz x1, #instruction-number
3380 * b .Lraisexcpt
3381 * .Lnoexcept:
3382 */
3383 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3384
3385 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3386 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3387 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3388
3389# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3390 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3391# else
3392 uint32_t const offFixup = off;
3393 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3394 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3395 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3396 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3397# endif
3398
3399 iemNativeRegFreeTmp(pReNative, iTmpReg);
3400
3401#else
3402# error "Port me"
3403#endif
3404 return off;
3405}
3406
3407
3408/**
3409 * Emits code to check if the content of @a idxAddrReg is within the limit of
3410 * idxSegReg, raising a \#GP(0) if it isn't.
3411 *
3412 * @returns New code buffer offset; throws VBox status code on error.
3413 * @param pReNative The native recompile state.
3414 * @param off The code buffer offset.
3415 * @param idxAddrReg The host register (32-bit) with the address to
3416 * check.
3417 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3418 * against.
3419 * @param idxInstr The current instruction.
3420 */
3421DECL_HIDDEN_THROW(uint32_t)
3422iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3423 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3424{
3425 /*
3426 * Make sure we don't have any outstanding guest register writes as we may
3427 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3428 */
3429 off = iemNativeRegFlushPendingWrites(pReNative, off);
3430
3431 /** @todo implement expand down/whatnot checking */
3432 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3433
3434 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3435 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3436 kIemNativeGstRegUse_ForUpdate);
3437
3438 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3439
3440#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3441 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3442 RT_NOREF(idxInstr);
3443#else
3444 uint32_t const offFixup = off;
3445 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3446 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3447 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3448 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3449#endif
3450
3451 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3452 return off;
3453}
3454
3455
3456/**
3457 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3458 *
3459 * @returns The flush mask.
3460 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3461 * @param fGstShwFlush The starting flush mask.
3462 */
3463DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3464{
3465 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3466 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3467 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3468 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3469 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3470 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3471 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3472 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3473 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3474 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3475 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3476 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3477 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3478 return fGstShwFlush;
3479}
3480
3481
3482/**
3483 * Emits a call to a CImpl function or something similar.
3484 */
3485static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3486 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3487 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3488{
3489 /*
3490 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
3491 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
3492 */
3493 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
3494 fGstShwFlush
3495 | RT_BIT_64(kIemNativeGstReg_Pc)
3496 | RT_BIT_64(kIemNativeGstReg_EFlags));
3497 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3498
3499 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3500
3501 /*
3502 * Load the parameters.
3503 */
3504#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3505 /* Special code the hidden VBOXSTRICTRC pointer. */
3506 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3507 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3508 if (cAddParams > 0)
3509 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3510 if (cAddParams > 1)
3511 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3512 if (cAddParams > 2)
3513 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3514 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3515
3516#else
3517 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3518 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3519 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3520 if (cAddParams > 0)
3521 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3522 if (cAddParams > 1)
3523 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3524 if (cAddParams > 2)
3525# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3526 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3527# else
3528 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3529# endif
3530#endif
3531
3532 /*
3533 * Make the call.
3534 */
3535 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3536
3537#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3538 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3539#endif
3540
3541 /*
3542 * Check the status code.
3543 */
3544 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3545}
3546
3547
3548/**
3549 * Emits a call to a threaded worker function.
3550 */
3551static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3552{
3553 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3554 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3555 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3556
3557#ifdef RT_ARCH_AMD64
3558 /* Load the parameters and emit the call. */
3559# ifdef RT_OS_WINDOWS
3560# ifndef VBOXSTRICTRC_STRICT_ENABLED
3561 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3562 if (cParams > 0)
3563 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3564 if (cParams > 1)
3565 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3566 if (cParams > 2)
3567 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3568# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3570 if (cParams > 0)
3571 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3572 if (cParams > 1)
3573 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3574 if (cParams > 2)
3575 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3576 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3577 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3578# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3579# else
3580 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3581 if (cParams > 0)
3582 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3583 if (cParams > 1)
3584 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3585 if (cParams > 2)
3586 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3587# endif
3588
3589 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3590
3591# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3592 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3593# endif
3594
3595#elif RT_ARCH_ARM64
3596 /*
3597 * ARM64:
3598 */
3599 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3600 if (cParams > 0)
3601 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3602 if (cParams > 1)
3603 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3604 if (cParams > 2)
3605 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3606
3607 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3608
3609#else
3610# error "port me"
3611#endif
3612
3613 /*
3614 * Check the status code.
3615 */
3616 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3617
3618 return off;
3619}
3620
3621
3622/**
3623 * Emits the code at the RaiseGP0 label.
3624 */
3625static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3626{
3627 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3628 if (idxLabel != UINT32_MAX)
3629 {
3630 iemNativeLabelDefine(pReNative, idxLabel, off);
3631
3632 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3634#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3635 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3636#endif
3637 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3638
3639 /* jump back to the return sequence. */
3640 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3641 }
3642 return off;
3643}
3644
3645
3646/**
3647 * Emits the code at the ReturnWithFlags label (returns
3648 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3649 */
3650static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3651{
3652 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3653 if (idxLabel != UINT32_MAX)
3654 {
3655 iemNativeLabelDefine(pReNative, idxLabel, off);
3656
3657 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3658
3659 /* jump back to the return sequence. */
3660 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3661 }
3662 return off;
3663}
3664
3665
3666/**
3667 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3668 */
3669static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3670{
3671 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3672 if (idxLabel != UINT32_MAX)
3673 {
3674 iemNativeLabelDefine(pReNative, idxLabel, off);
3675
3676 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3677
3678 /* jump back to the return sequence. */
3679 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3680 }
3681 return off;
3682}
3683
3684
3685/**
3686 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3687 */
3688static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3689{
3690 /*
3691 * Generate the rc + rcPassUp fiddling code if needed.
3692 */
3693 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3694 if (idxLabel != UINT32_MAX)
3695 {
3696 iemNativeLabelDefine(pReNative, idxLabel, off);
3697
3698 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3699#ifdef RT_ARCH_AMD64
3700# ifdef RT_OS_WINDOWS
3701# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3702 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3703# endif
3704 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3705 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3706# else
3707 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3708 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3709# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3710 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3711# endif
3712# endif
3713# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3714 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3715# endif
3716
3717#else
3718 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3719 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3720 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3721#endif
3722
3723 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3724 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3725 }
3726 return off;
3727}
3728
3729
3730/**
3731 * Emits a standard epilog.
3732 */
3733static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3734{
3735 *pidxReturnLabel = UINT32_MAX;
3736
3737 /*
3738 * Successful return, so clear the return register (eax, w0).
3739 */
3740 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3741
3742 /*
3743 * Define label for common return point.
3744 */
3745 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3746 *pidxReturnLabel = idxReturn;
3747
3748 /*
3749 * Restore registers and return.
3750 */
3751#ifdef RT_ARCH_AMD64
3752 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3753
3754 /* Reposition esp at the r15 restore point. */
3755 pbCodeBuf[off++] = X86_OP_REX_W;
3756 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3757 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3758 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3759
3760 /* Pop non-volatile registers and return */
3761 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3762 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3763 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3764 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3765 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3766 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3767 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3768 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3769# ifdef RT_OS_WINDOWS
3770 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3771 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3772# endif
3773 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3774 pbCodeBuf[off++] = 0xc9; /* leave */
3775 pbCodeBuf[off++] = 0xc3; /* ret */
3776 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3777
3778#elif RT_ARCH_ARM64
3779 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3780
3781 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3782 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3783 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3784 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3785 IEMNATIVE_FRAME_VAR_SIZE / 8);
3786 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3787 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3788 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3789 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3790 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3791 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3792 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3793 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3794 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3795 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3796 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3797 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3798
3799 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3800 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3801 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3802 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3803
3804 /* retab / ret */
3805# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3806 if (1)
3807 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3808 else
3809# endif
3810 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3811
3812#else
3813# error "port me"
3814#endif
3815 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3816
3817 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3818}
3819
3820
3821/**
3822 * Emits a standard prolog.
3823 */
3824static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3825{
3826#ifdef RT_ARCH_AMD64
3827 /*
3828 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3829 * reserving 64 bytes for stack variables plus 4 non-register argument
3830 * slots. Fixed register assignment: xBX = pReNative;
3831 *
3832 * Since we always do the same register spilling, we can use the same
3833 * unwind description for all the code.
3834 */
3835 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3836 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3837 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3838 pbCodeBuf[off++] = 0x8b;
3839 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3840 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3841 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3842# ifdef RT_OS_WINDOWS
3843 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3844 pbCodeBuf[off++] = 0x8b;
3845 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3846 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3847 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3848# else
3849 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3850 pbCodeBuf[off++] = 0x8b;
3851 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3852# endif
3853 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3854 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3855 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3856 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3857 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3858 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3859 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3860 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3861
3862 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3863 X86_GREG_xSP,
3864 IEMNATIVE_FRAME_ALIGN_SIZE
3865 + IEMNATIVE_FRAME_VAR_SIZE
3866 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3867 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3868 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3869 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3870 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3871
3872#elif RT_ARCH_ARM64
3873 /*
3874 * We set up a stack frame exactly like on x86, only we have to push the
3875 * return address our selves here. We save all non-volatile registers.
3876 */
3877 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3878
3879# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3880 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3881 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3882 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3883 /* pacibsp */
3884 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3885# endif
3886
3887 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3888 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3889 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3890 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3891 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3892 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3893 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3894 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3895 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3896 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3897 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3898 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3899 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3900 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3901 /* Save the BP and LR (ret address) registers at the top of the frame. */
3902 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3903 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3904 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3905 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3906 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3907 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3908
3909 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3910 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3911
3912 /* mov r28, r0 */
3913 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3914 /* mov r27, r1 */
3915 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3916
3917#else
3918# error "port me"
3919#endif
3920 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3921 return off;
3922}
3923
3924
3925
3926
3927/*********************************************************************************************************************************
3928* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
3929*********************************************************************************************************************************/
3930
3931#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3932 { \
3933 pReNative->fMc = (a_fMcFlags); \
3934 pReNative->fCImpl = (a_fCImplFlags); \
3935 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
3936
3937/** We have to get to the end in recompilation mode, as otherwise we won't
3938 * generate code for all the IEM_MC_IF_XXX branches. */
3939#define IEM_MC_END() \
3940 } return off
3941
3942
3943
3944/*********************************************************************************************************************************
3945* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
3946*********************************************************************************************************************************/
3947
3948#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
3949 pReNative->fMc = 0; \
3950 pReNative->fCImpl = (a_fFlags); \
3951 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3952
3953
3954#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3955 pReNative->fMc = 0; \
3956 pReNative->fCImpl = (a_fFlags); \
3957 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3958
3959DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3960 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3961 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3962{
3963 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3964}
3965
3966
3967#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3968 pReNative->fMc = 0; \
3969 pReNative->fCImpl = (a_fFlags); \
3970 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
3971 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3972
3973DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3974 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3975 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3976{
3977 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3978}
3979
3980
3981#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3982 pReNative->fMc = 0; \
3983 pReNative->fCImpl = (a_fFlags); \
3984 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
3985 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3986
3987DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3988 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3989 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
3990 uint64_t uArg2)
3991{
3992 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3993}
3994
3995
3996
3997/*********************************************************************************************************************************
3998* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
3999*********************************************************************************************************************************/
4000
4001/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4002 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
4003DECL_INLINE_THROW(uint32_t)
4004iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4005{
4006 /*
4007 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4008 * return with special status code and make the execution loop deal with
4009 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4010 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4011 * could continue w/o interruption, it probably will drop into the
4012 * debugger, so not worth the effort of trying to services it here and we
4013 * just lump it in with the handling of the others.
4014 *
4015 * To simplify the code and the register state management even more (wrt
4016 * immediate in AND operation), we always update the flags and skip the
4017 * extra check associated conditional jump.
4018 */
4019 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4020 <= UINT32_MAX);
4021 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4022 kIemNativeGstRegUse_ForUpdate);
4023 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4024 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4025 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4026 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4027 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4028
4029 /* Free but don't flush the EFLAGS register. */
4030 iemNativeRegFreeTmp(pReNative, idxEflReg);
4031
4032 return off;
4033}
4034
4035
4036#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4037 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4038
4039#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4040 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4041 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4042
4043/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4044DECL_INLINE_THROW(uint32_t)
4045iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4046{
4047 /* Allocate a temporary PC register. */
4048 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4049
4050 /* Perform the addition and store the result. */
4051 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4052 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4053
4054 /* Free but don't flush the PC register. */
4055 iemNativeRegFreeTmp(pReNative, idxPcReg);
4056
4057 return off;
4058}
4059
4060
4061#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4062 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4063
4064#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4065 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4066 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4067
4068/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4069DECL_INLINE_THROW(uint32_t)
4070iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4071{
4072 /* Allocate a temporary PC register. */
4073 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4074
4075 /* Perform the addition and store the result. */
4076 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4077 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4078
4079 /* Free but don't flush the PC register. */
4080 iemNativeRegFreeTmp(pReNative, idxPcReg);
4081
4082 return off;
4083}
4084
4085
4086#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4087 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4088
4089#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4090 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4091 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4092
4093/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4094DECL_INLINE_THROW(uint32_t)
4095iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4096{
4097 /* Allocate a temporary PC register. */
4098 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4099
4100 /* Perform the addition and store the result. */
4101 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4102 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4103 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4104
4105 /* Free but don't flush the PC register. */
4106 iemNativeRegFreeTmp(pReNative, idxPcReg);
4107
4108 return off;
4109}
4110
4111
4112
4113/*********************************************************************************************************************************
4114* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4115*********************************************************************************************************************************/
4116
4117#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4118 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4119 (a_enmEffOpSize), pCallEntry->idxInstr)
4120
4121#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4122 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4123 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4124
4125#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4126 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4127 IEMMODE_16BIT, pCallEntry->idxInstr)
4128
4129#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4130 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4132
4133#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4134 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4135 IEMMODE_64BIT, pCallEntry->idxInstr)
4136
4137#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4138 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4139 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4140
4141/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4142 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4143 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4144DECL_INLINE_THROW(uint32_t)
4145iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4146 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4147{
4148 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4149
4150 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4151 off = iemNativeRegFlushPendingWrites(pReNative, off);
4152
4153 /* Allocate a temporary PC register. */
4154 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4155
4156 /* Perform the addition. */
4157 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4158
4159 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4160 {
4161 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4162 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4163 }
4164 else
4165 {
4166 /* Just truncate the result to 16-bit IP. */
4167 Assert(enmEffOpSize == IEMMODE_16BIT);
4168 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4169 }
4170 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4171
4172 /* Free but don't flush the PC register. */
4173 iemNativeRegFreeTmp(pReNative, idxPcReg);
4174
4175 return off;
4176}
4177
4178
4179#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4180 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4181 (a_enmEffOpSize), pCallEntry->idxInstr)
4182
4183#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4184 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4185 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4186
4187#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4188 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4189 IEMMODE_16BIT, pCallEntry->idxInstr)
4190
4191#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4192 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4193 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4194
4195#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4196 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4197 IEMMODE_32BIT, pCallEntry->idxInstr)
4198
4199#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4200 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4201 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4202
4203/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4204 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4205 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4206DECL_INLINE_THROW(uint32_t)
4207iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4208 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4209{
4210 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4211
4212 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4213 off = iemNativeRegFlushPendingWrites(pReNative, off);
4214
4215 /* Allocate a temporary PC register. */
4216 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4217
4218 /* Perform the addition. */
4219 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4220
4221 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4222 if (enmEffOpSize == IEMMODE_16BIT)
4223 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4224
4225 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4226 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4227
4228 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4229
4230 /* Free but don't flush the PC register. */
4231 iemNativeRegFreeTmp(pReNative, idxPcReg);
4232
4233 return off;
4234}
4235
4236
4237#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4238 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4239
4240#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4241 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4242 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4243
4244#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4245 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4246
4247#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4248 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4249 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4250
4251#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4252 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4253
4254#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4255 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4256 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4257
4258/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4259DECL_INLINE_THROW(uint32_t)
4260iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4261 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4262{
4263 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4264 off = iemNativeRegFlushPendingWrites(pReNative, off);
4265
4266 /* Allocate a temporary PC register. */
4267 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4268
4269 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4270 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4271 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4272 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4273 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4274
4275 /* Free but don't flush the PC register. */
4276 iemNativeRegFreeTmp(pReNative, idxPcReg);
4277
4278 return off;
4279}
4280
4281
4282
4283/*********************************************************************************************************************************
4284* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4285*********************************************************************************************************************************/
4286
4287/**
4288 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4289 *
4290 * @returns Pointer to the condition stack entry on success, NULL on failure
4291 * (too many nestings)
4292 */
4293DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4294{
4295 uint32_t const idxStack = pReNative->cCondDepth;
4296 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4297
4298 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4299 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4300
4301 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4302 pEntry->fInElse = false;
4303 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4304 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4305
4306 return pEntry;
4307}
4308
4309
4310/**
4311 * Start of the if-block, snapshotting the register and variable state.
4312 */
4313DECL_INLINE_THROW(void)
4314iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4315{
4316 Assert(offIfBlock != UINT32_MAX);
4317 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4318 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4319 Assert(!pEntry->fInElse);
4320
4321 /* Define the start of the IF block if request or for disassembly purposes. */
4322 if (idxLabelIf != UINT32_MAX)
4323 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4324#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4325 else
4326 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4327#else
4328 RT_NOREF(offIfBlock);
4329#endif
4330
4331 /* Copy the initial state so we can restore it in the 'else' block. */
4332 pEntry->InitialState = pReNative->Core;
4333}
4334
4335
4336#define IEM_MC_ELSE() } while (0); \
4337 off = iemNativeEmitElse(pReNative, off); \
4338 do {
4339
4340/** Emits code related to IEM_MC_ELSE. */
4341DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4342{
4343 /* Check sanity and get the conditional stack entry. */
4344 Assert(off != UINT32_MAX);
4345 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4346 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4347 Assert(!pEntry->fInElse);
4348
4349 /* Jump to the endif */
4350 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4351
4352 /* Define the else label and enter the else part of the condition. */
4353 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4354 pEntry->fInElse = true;
4355
4356 /* Snapshot the core state so we can do a merge at the endif and restore
4357 the snapshot we took at the start of the if-block. */
4358 pEntry->IfFinalState = pReNative->Core;
4359 pReNative->Core = pEntry->InitialState;
4360
4361 return off;
4362}
4363
4364
4365#define IEM_MC_ENDIF() } while (0); \
4366 off = iemNativeEmitEndIf(pReNative, off)
4367
4368/** Emits code related to IEM_MC_ENDIF. */
4369DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4370{
4371 /* Check sanity and get the conditional stack entry. */
4372 Assert(off != UINT32_MAX);
4373 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4374 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4375
4376 /*
4377 * Now we have find common group with the core state at the end of the
4378 * if-final. Use the smallest common denominator and just drop anything
4379 * that isn't the same in both states.
4380 */
4381 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4382 * which is why we're doing this at the end of the else-block.
4383 * But we'd need more info about future for that to be worth the effort. */
4384 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4385 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4386 {
4387 /* shadow guest stuff first. */
4388 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4389 if (fGstRegs)
4390 {
4391 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4392 do
4393 {
4394 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4395 fGstRegs &= ~RT_BIT_64(idxGstReg);
4396
4397 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4398 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4399 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4400 {
4401 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4402 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4403 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4404 }
4405 } while (fGstRegs);
4406 }
4407 else
4408 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4409
4410 /* Check variables next. For now we must require them to be identical
4411 or stuff we can recreate. */
4412 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4413 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4414 if (fVars)
4415 {
4416 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4417 do
4418 {
4419 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4420 fVars &= ~RT_BIT_32(idxVar);
4421
4422 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4423 {
4424 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4425 continue;
4426 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4427 {
4428 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4429 if (idxHstReg != UINT8_MAX)
4430 {
4431 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4432 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4433 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4434 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4435 }
4436 continue;
4437 }
4438 }
4439 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4440 continue;
4441
4442 /* Irreconcilable, so drop it. */
4443 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4444 if (idxHstReg != UINT8_MAX)
4445 {
4446 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4447 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4448 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4449 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4450 }
4451 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4452 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4453 } while (fVars);
4454 }
4455
4456 /* Finally, check that the host register allocations matches. */
4457 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4458 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4459 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4461 }
4462
4463 /*
4464 * Define the endif label and maybe the else one if we're still in the 'if' part.
4465 */
4466 if (!pEntry->fInElse)
4467 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4468 else
4469 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4470 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4471
4472 /* Pop the conditional stack.*/
4473 pReNative->cCondDepth -= 1;
4474
4475 return off;
4476}
4477
4478
4479#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4480 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4481 do {
4482
4483/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4484DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4485{
4486 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4487
4488 /* Get the eflags. */
4489 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4490 kIemNativeGstRegUse_ReadOnly);
4491
4492 /* Test and jump. */
4493 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4494
4495 /* Free but don't flush the EFlags register. */
4496 iemNativeRegFreeTmp(pReNative, idxEflReg);
4497
4498 /* Make a copy of the core state now as we start the if-block. */
4499 iemNativeCondStartIfBlock(pReNative, off);
4500
4501 return off;
4502}
4503
4504
4505#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4506 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4507 do {
4508
4509/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4510DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4511{
4512 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4513
4514 /* Get the eflags. */
4515 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4516 kIemNativeGstRegUse_ReadOnly);
4517
4518 /* Test and jump. */
4519 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4520
4521 /* Free but don't flush the EFlags register. */
4522 iemNativeRegFreeTmp(pReNative, idxEflReg);
4523
4524 /* Make a copy of the core state now as we start the if-block. */
4525 iemNativeCondStartIfBlock(pReNative, off);
4526
4527 return off;
4528}
4529
4530
4531#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4532 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4533 do {
4534
4535/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4536DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4537{
4538 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4539
4540 /* Get the eflags. */
4541 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4542 kIemNativeGstRegUse_ReadOnly);
4543
4544 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4545 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4546
4547 /* Test and jump. */
4548 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4549
4550 /* Free but don't flush the EFlags register. */
4551 iemNativeRegFreeTmp(pReNative, idxEflReg);
4552
4553 /* Make a copy of the core state now as we start the if-block. */
4554 iemNativeCondStartIfBlock(pReNative, off);
4555
4556 return off;
4557}
4558
4559
4560#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4561 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4562 do {
4563
4564/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4565DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4566{
4567 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4568
4569 /* Get the eflags. */
4570 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4571 kIemNativeGstRegUse_ReadOnly);
4572
4573 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4574 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4575
4576 /* Test and jump. */
4577 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4578
4579 /* Free but don't flush the EFlags register. */
4580 iemNativeRegFreeTmp(pReNative, idxEflReg);
4581
4582 /* Make a copy of the core state now as we start the if-block. */
4583 iemNativeCondStartIfBlock(pReNative, off);
4584
4585 return off;
4586}
4587
4588
4589#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4590 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4591 do {
4592
4593#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4594 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4595 do {
4596
4597/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4598DECL_INLINE_THROW(uint32_t)
4599iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4600 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4601{
4602 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4603
4604 /* Get the eflags. */
4605 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4606 kIemNativeGstRegUse_ReadOnly);
4607
4608 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4609 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4610
4611 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4612 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4613 Assert(iBitNo1 != iBitNo2);
4614
4615#ifdef RT_ARCH_AMD64
4616 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4617
4618 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4619 if (iBitNo1 > iBitNo2)
4620 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4621 else
4622 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4623 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4624
4625#elif defined(RT_ARCH_ARM64)
4626 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4627 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4628
4629 /* and tmpreg, eflreg, #1<<iBitNo1 */
4630 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4631
4632 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4633 if (iBitNo1 > iBitNo2)
4634 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4635 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4636 else
4637 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4638 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4639
4640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4641
4642#else
4643# error "Port me"
4644#endif
4645
4646 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4647 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4648 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4649
4650 /* Free but don't flush the EFlags and tmp registers. */
4651 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4652 iemNativeRegFreeTmp(pReNative, idxEflReg);
4653
4654 /* Make a copy of the core state now as we start the if-block. */
4655 iemNativeCondStartIfBlock(pReNative, off);
4656
4657 return off;
4658}
4659
4660
4661#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4662 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4663 do {
4664
4665#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4666 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4667 do {
4668
4669/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4670 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4671DECL_INLINE_THROW(uint32_t)
4672iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4673 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4674{
4675 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4676
4677 /* We need an if-block label for the non-inverted variant. */
4678 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4679 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4680
4681 /* Get the eflags. */
4682 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4683 kIemNativeGstRegUse_ReadOnly);
4684
4685 /* Translate the flag masks to bit numbers. */
4686 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4687 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4688
4689 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4690 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4691 Assert(iBitNo1 != iBitNo);
4692
4693 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4694 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4695 Assert(iBitNo2 != iBitNo);
4696 Assert(iBitNo2 != iBitNo1);
4697
4698#ifdef RT_ARCH_AMD64
4699 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4700#elif defined(RT_ARCH_ARM64)
4701 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4702#endif
4703
4704 /* Check for the lone bit first. */
4705 if (!fInverted)
4706 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4707 else
4708 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4709
4710 /* Then extract and compare the other two bits. */
4711#ifdef RT_ARCH_AMD64
4712 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4713 if (iBitNo1 > iBitNo2)
4714 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4715 else
4716 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4717 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4718
4719#elif defined(RT_ARCH_ARM64)
4720 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4721
4722 /* and tmpreg, eflreg, #1<<iBitNo1 */
4723 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4724
4725 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4726 if (iBitNo1 > iBitNo2)
4727 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4728 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4729 else
4730 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4731 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4732
4733 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4734
4735#else
4736# error "Port me"
4737#endif
4738
4739 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4740 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4741 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4742
4743 /* Free but don't flush the EFlags and tmp registers. */
4744 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4745 iemNativeRegFreeTmp(pReNative, idxEflReg);
4746
4747 /* Make a copy of the core state now as we start the if-block. */
4748 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4749
4750 return off;
4751}
4752
4753
4754#define IEM_MC_IF_CX_IS_NZ() \
4755 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4756 do {
4757
4758/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4759DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4760{
4761 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4762
4763 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4764 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4765 kIemNativeGstRegUse_ReadOnly);
4766 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4767 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4768
4769 iemNativeCondStartIfBlock(pReNative, off);
4770 return off;
4771}
4772
4773
4774#define IEM_MC_IF_ECX_IS_NZ() \
4775 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4776 do {
4777
4778#define IEM_MC_IF_RCX_IS_NZ() \
4779 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4780 do {
4781
4782/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4783DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4784{
4785 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4786
4787 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4788 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4789 kIemNativeGstRegUse_ReadOnly);
4790 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4791 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4792
4793 iemNativeCondStartIfBlock(pReNative, off);
4794 return off;
4795}
4796
4797
4798#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4799 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4800 do {
4801
4802#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4803 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4804 do {
4805
4806/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4807DECL_INLINE_THROW(uint32_t)
4808iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4809{
4810 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4811
4812 /* We have to load both RCX and EFLAGS before we can start branching,
4813 otherwise we'll end up in the else-block with an inconsistent
4814 register allocator state.
4815 Doing EFLAGS first as it's more likely to be loaded, right? */
4816 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4817 kIemNativeGstRegUse_ReadOnly);
4818 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4819 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4820 kIemNativeGstRegUse_ReadOnly);
4821
4822 /** @todo we could reduce this to a single branch instruction by spending a
4823 * temporary register and some setnz stuff. Not sure if loops are
4824 * worth it. */
4825 /* Check CX. */
4826 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4827
4828 /* Check the EFlags bit. */
4829 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4830 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4831 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4832 !fCheckIfSet /*fJmpIfSet*/);
4833
4834 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4835 iemNativeRegFreeTmp(pReNative, idxEflReg);
4836
4837 iemNativeCondStartIfBlock(pReNative, off);
4838 return off;
4839}
4840
4841
4842#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4843 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4844 do {
4845
4846#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4847 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4848 do {
4849
4850#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4851 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4852 do {
4853
4854#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4855 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4856 do {
4857
4858/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4859 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4860 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4861 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4862DECL_INLINE_THROW(uint32_t)
4863iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4864 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4865{
4866 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4867
4868 /* We have to load both RCX and EFLAGS before we can start branching,
4869 otherwise we'll end up in the else-block with an inconsistent
4870 register allocator state.
4871 Doing EFLAGS first as it's more likely to be loaded, right? */
4872 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4873 kIemNativeGstRegUse_ReadOnly);
4874 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4875 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4876 kIemNativeGstRegUse_ReadOnly);
4877
4878 /** @todo we could reduce this to a single branch instruction by spending a
4879 * temporary register and some setnz stuff. Not sure if loops are
4880 * worth it. */
4881 /* Check RCX/ECX. */
4882 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4883
4884 /* Check the EFlags bit. */
4885 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4886 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4887 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4888 !fCheckIfSet /*fJmpIfSet*/);
4889
4890 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4891 iemNativeRegFreeTmp(pReNative, idxEflReg);
4892
4893 iemNativeCondStartIfBlock(pReNative, off);
4894 return off;
4895}
4896
4897
4898
4899/*********************************************************************************************************************************
4900* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
4901*********************************************************************************************************************************/
4902/** Number of hidden arguments for CIMPL calls.
4903 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
4904#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4905# define IEM_CIMPL_HIDDEN_ARGS 3
4906#else
4907# define IEM_CIMPL_HIDDEN_ARGS 2
4908#endif
4909
4910#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
4911 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
4912
4913#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
4914 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
4915
4916#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
4917 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
4918
4919#define IEM_MC_LOCAL(a_Type, a_Name) \
4920 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
4921
4922#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
4923 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
4924
4925
4926/**
4927 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
4928 */
4929DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
4930{
4931 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
4932 return IEM_CIMPL_HIDDEN_ARGS;
4933 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
4934 return 1;
4935 return 0;
4936}
4937
4938
4939/**
4940 * Internal work that allocates a variable with kind set to
4941 * kIemNativeVarKind_Invalid and no current stack allocation.
4942 *
4943 * The kind will either be set by the caller or later when the variable is first
4944 * assigned a value.
4945 */
4946static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
4947{
4948 Assert(cbType > 0 && cbType <= 64);
4949 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
4950 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
4951 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
4952 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
4953 pReNative->Core.aVars[idxVar].cbVar = cbType;
4954 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
4955 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4956 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
4957 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
4958 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
4959 pReNative->Core.aVars[idxVar].u.uValue = 0;
4960 return idxVar;
4961}
4962
4963
4964/**
4965 * Internal work that allocates an argument variable w/o setting enmKind.
4966 */
4967static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
4968{
4969 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
4970 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
4971 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
4972
4973 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
4974 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
4975 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
4976 return idxVar;
4977}
4978
4979
4980/**
4981 * Changes the variable to a stack variable.
4982 *
4983 * Currently this is s only possible to do the first time the variable is used,
4984 * switching later is can be implemented but not done.
4985 *
4986 * @param pReNative The recompiler state.
4987 * @param idxVar The variable.
4988 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
4989 */
4990static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4991{
4992 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
4993 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4994 {
4995 /* We could in theory transition from immediate to stack as well, but it
4996 would involve the caller doing work storing the value on the stack. So,
4997 till that's required we only allow transition from invalid. */
4998 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
4999 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5000 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
5001
5002 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
5003 {
5004 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5005 {
5006 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
5007 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5008 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5009 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5010 return;
5011 }
5012 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
5013 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5014 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5015 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5016 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5017 uint32_t bmStack = ~pReNative->Core.bmStack;
5018 while (bmStack != UINT32_MAX)
5019 {
5020 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5021 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5022 if (!(iSlot & fBitAlignMask))
5023 {
5024 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5025 {
5026 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5027 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5028 return;
5029 }
5030 }
5031 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5032 }
5033 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5034 }
5035 }
5036}
5037
5038
5039/**
5040 * Changes it to a variable with a constant value.
5041 *
5042 * This does not require stack storage as we know the value and can always
5043 * reload it, unless of course it's referenced.
5044 *
5045 * @param pReNative The recompiler state.
5046 * @param idxVar The variable.
5047 * @param uValue The immediate value.
5048 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5049 */
5050static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5051{
5052 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5053 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5054 {
5055 /* Only simple trasnsitions for now. */
5056 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5057 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5058 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5059 }
5060 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5061}
5062
5063
5064/**
5065 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5066 *
5067 * @param pReNative The recompiler state.
5068 * @param idxVar The variable.
5069 * @param idxOtherVar The variable to take the (stack) address of.
5070 *
5071 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5072 */
5073static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5074{
5075 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5076 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5077
5078 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5079 {
5080 /* Only simple trasnsitions for now. */
5081 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5082 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5083 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5084 }
5085 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5086
5087 /* Update the other variable, ensure it's a stack variable. */
5088 /** @todo handle variables with const values... that's go boom now. */
5089 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5090 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5091}
5092
5093
5094DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5095{
5096 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5097}
5098
5099
5100DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5101{
5102 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5103 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5104 return idxVar;
5105}
5106
5107
5108DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5109{
5110 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5111 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5112 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5113 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5114
5115 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5116 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5117 return idxArgVar;
5118}
5119
5120
5121DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5122{
5123 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5124 iemNativeVarSetKindToStack(pReNative, idxVar);
5125 return idxVar;
5126}
5127
5128
5129DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5130{
5131 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5132 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5133 return idxVar;
5134}
5135
5136
5137/**
5138 * Makes sure variable @a idxVar has a register assigned to it.
5139 *
5140 * @returns The host register number.
5141 * @param pReNative The recompiler state.
5142 * @param idxVar The variable.
5143 * @param poff Pointer to the instruction buffer offset.
5144 * In case a register needs to be freed up.
5145 */
5146DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5147{
5148 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5149
5150 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5151 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5152 return idxReg;
5153
5154 /*
5155 * We have to allocate a register for the variable, even if its a stack one
5156 * as we don't know if there are modification being made to it before its
5157 * finalized (todo: analyze and insert hints about that?).
5158 *
5159 * If we can, we try get the correct register for argument variables. This
5160 * is assuming that most argument variables are fetched as close as possible
5161 * to the actual call, so that there aren't any interfering hidden calls
5162 * (memory accesses, etc) inbetween.
5163 *
5164 * If we cannot or it's a variable, we make sure no argument registers
5165 * that will be used by this MC block will be allocated here, and we always
5166 * prefer non-volatile registers to avoid needing to spill stuff for internal
5167 * call.
5168 */
5169 /** @todo Detect too early argument value fetches and warn about hidden
5170 * calls causing less optimal code to be generated in the python script. */
5171
5172 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5173 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5174 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5175 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5176 else
5177 {
5178 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5179 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5180 & ~pReNative->Core.bmHstRegsWithGstShadow
5181 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5182 & fNotArgsMask;
5183 if (fRegs)
5184 {
5185 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5186 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5187 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5188 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5189 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5190 }
5191 else
5192 {
5193 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5194 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5195 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5196 }
5197 }
5198 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5199 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5200 return idxReg;
5201}
5202
5203
5204
5205/*********************************************************************************************************************************
5206* Emitters for IEM_MC_CALL_CIMPL_XXX *
5207*********************************************************************************************************************************/
5208
5209/**
5210 * Emits code to load a reference to the given guest register into @a idxGprDst.
5211 */
5212DECL_INLINE_THROW(uint32_t)
5213iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5214 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5215{
5216 /*
5217 * Get the offset relative to the CPUMCTX structure.
5218 */
5219 uint32_t offCpumCtx;
5220 switch (enmClass)
5221 {
5222 case kIemNativeGstRegRef_Gpr:
5223 Assert(idxRegInClass < 16);
5224 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5225 break;
5226
5227 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5228 Assert(idxRegInClass < 4);
5229 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5230 break;
5231
5232 case kIemNativeGstRegRef_EFlags:
5233 Assert(idxRegInClass == 0);
5234 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5235 break;
5236
5237 case kIemNativeGstRegRef_MxCsr:
5238 Assert(idxRegInClass == 0);
5239 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5240 break;
5241
5242 case kIemNativeGstRegRef_FpuReg:
5243 Assert(idxRegInClass < 8);
5244 AssertFailed(); /** @todo what kind of indexing? */
5245 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5246 break;
5247
5248 case kIemNativeGstRegRef_MReg:
5249 Assert(idxRegInClass < 8);
5250 AssertFailed(); /** @todo what kind of indexing? */
5251 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5252 break;
5253
5254 case kIemNativeGstRegRef_XReg:
5255 Assert(idxRegInClass < 16);
5256 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5257 break;
5258
5259 default:
5260 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5261 }
5262
5263 /*
5264 * Load the value into the destination register.
5265 */
5266#ifdef RT_ARCH_AMD64
5267 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5268
5269#elif defined(RT_ARCH_ARM64)
5270 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5271 Assert(offCpumCtx < 4096);
5272 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5273
5274#else
5275# error "Port me!"
5276#endif
5277
5278 return off;
5279}
5280
5281
5282/**
5283 * Common code for CIMPL and AIMPL calls.
5284 *
5285 * These are calls that uses argument variables and such. They should not be
5286 * confused with internal calls required to implement an MC operation,
5287 * like a TLB load and similar.
5288 *
5289 * Upon return all that is left to do is to load any hidden arguments and
5290 * perform the call. All argument variables are freed.
5291 *
5292 * @returns New code buffer offset; throws VBox status code on error.
5293 * @param pReNative The native recompile state.
5294 * @param off The code buffer offset.
5295 * @param cArgs The total nubmer of arguments (includes hidden
5296 * count).
5297 * @param cHiddenArgs The number of hidden arguments. The hidden
5298 * arguments must not have any variable declared for
5299 * them, whereas all the regular arguments must
5300 * (tstIEMCheckMc ensures this).
5301 */
5302DECL_HIDDEN_THROW(uint32_t)
5303iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5304{
5305#ifdef VBOX_STRICT
5306 /*
5307 * Assert sanity.
5308 */
5309 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5310 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5311 for (unsigned i = 0; i < cHiddenArgs; i++)
5312 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5313 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5314 {
5315 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5316 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5317 }
5318#endif
5319
5320 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5321
5322 /*
5323 * First, go over the host registers that will be used for arguments and make
5324 * sure they either hold the desired argument or are free.
5325 */
5326 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5327 for (uint32_t i = 0; i < cRegArgs; i++)
5328 {
5329 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5330 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5331 {
5332 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5333 {
5334 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5335 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5336 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5337 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5338 if (uArgNo == i)
5339 { /* prefect */ }
5340 else
5341 {
5342 /* The variable allocator logic should make sure this is impossible. */
5343 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5344
5345 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5346 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5347 else
5348 {
5349 /* just free it, can be reloaded if used again */
5350 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5351 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5352 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5353 }
5354 }
5355 }
5356 else
5357 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5358 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5359 }
5360 }
5361
5362 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5363
5364 /*
5365 * Make sure the argument variables are loaded into their respective registers.
5366 *
5367 * We can optimize this by ASSUMING that any register allocations are for
5368 * registeres that have already been loaded and are ready. The previous step
5369 * saw to that.
5370 */
5371 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5372 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5373 {
5374 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5375 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5376 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5377 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5378 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5379 else
5380 {
5381 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5382 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5383 {
5384 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5385 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5386 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5387 | RT_BIT_32(idxArgReg);
5388 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5389 }
5390 else
5391 {
5392 /* Use ARG0 as temp for stuff we need registers for. */
5393 switch (pReNative->Core.aVars[idxVar].enmKind)
5394 {
5395 case kIemNativeVarKind_Stack:
5396 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5397 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5398 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg,
5399 IEMNATIVE_FP_OFF_STACK_VARS
5400 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5401 continue;
5402
5403 case kIemNativeVarKind_Immediate:
5404 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5405 continue;
5406
5407 case kIemNativeVarKind_VarRef:
5408 {
5409 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5410 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5411 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5412 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5413 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5414 IEMNATIVE_FP_OFF_STACK_VARS
5415 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5416 continue;
5417 }
5418
5419 case kIemNativeVarKind_GstRegRef:
5420 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5421 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5422 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5423 continue;
5424
5425 case kIemNativeVarKind_Invalid:
5426 case kIemNativeVarKind_End:
5427 break;
5428 }
5429 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5430 }
5431 }
5432 }
5433#ifdef VBOX_STRICT
5434 else
5435 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5436 {
5437 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5438 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5439 }
5440#endif
5441
5442#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5443 /*
5444 * If there are any stack arguments, make sure they are in their place as well.
5445 *
5446 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5447 * caller will load it later and it must be free (see first loop).
5448 */
5449 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5450 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5451 {
5452 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5453 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5454 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5455 {
5456 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5457 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5458 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5459 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5460 }
5461 else
5462 {
5463 /* Use ARG0 as temp for stuff we need registers for. */
5464 switch (pReNative->Core.aVars[idxVar].enmKind)
5465 {
5466 case kIemNativeVarKind_Stack:
5467 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5468 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5469 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5470 IEMNATIVE_FP_OFF_STACK_VARS
5471 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5472 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5473 continue;
5474
5475 case kIemNativeVarKind_Immediate:
5476 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5477 continue;
5478
5479 case kIemNativeVarKind_VarRef:
5480 {
5481 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5482 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5483 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5484 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5485 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5486 IEMNATIVE_FP_OFF_STACK_VARS
5487 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5488 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5489 continue;
5490 }
5491
5492 case kIemNativeVarKind_GstRegRef:
5493 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5494 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5495 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5496 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5497 continue;
5498
5499 case kIemNativeVarKind_Invalid:
5500 case kIemNativeVarKind_End:
5501 break;
5502 }
5503 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5504 }
5505 }
5506#else
5507 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5508#endif
5509
5510 /*
5511 * Free all argument variables (simplified).
5512 * Their lifetime always expires with the call they are for.
5513 */
5514 /** @todo Make the python script check that arguments aren't used after
5515 * IEM_MC_CALL_XXXX. */
5516 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
5517 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
5518 * an argument value. */
5519 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5520 {
5521 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
5522 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5523 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5524 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5525 }
5526 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5527
5528 /*
5529 * Flush volatile registers as we make the call.
5530 */
5531 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5532
5533 return off;
5534}
5535
5536
5537/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5538DECL_HIDDEN_THROW(uint32_t)
5539iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5540 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
5541
5542{
5543 /*
5544 * Do all the call setup and cleanup.
5545 */
5546 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5547
5548 /*
5549 * Load the two hidden arguments.
5550 */
5551#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5552 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5553 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5554 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5555#else
5556 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5557 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5558#endif
5559
5560 /*
5561 * Make the call and check the return code.
5562 *
5563 * Shadow PC copies are always flushed here, other stuff depends on flags.
5564 * Segment and general purpose registers are explictily flushed via the
5565 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
5566 * macros.
5567 */
5568 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5569#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5570 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5571#endif
5572 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
5573 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
5574 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5575 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5576
5577 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5578}
5579
5580
5581#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5582 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
5583
5584/** Emits code for IEM_MC_CALL_CIMPL_1. */
5585DECL_INLINE_THROW(uint32_t)
5586iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5587 uintptr_t pfnCImpl, uint8_t idxArg0)
5588{
5589 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5590 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5591 RT_NOREF_PV(idxArg0);
5592
5593 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
5594}
5595
5596
5597#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5598 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
5599
5600/** Emits code for IEM_MC_CALL_CIMPL_2. */
5601DECL_INLINE_THROW(uint32_t)
5602iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5603 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
5604{
5605 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5606 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5607 RT_NOREF_PV(idxArg0);
5608
5609 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5610 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5611 RT_NOREF_PV(idxArg1);
5612
5613 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
5614}
5615
5616
5617#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5618 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5619 (uintptr_t)a_pfnCImpl, a0, a1, a2)
5620
5621/** Emits code for IEM_MC_CALL_CIMPL_3. */
5622DECL_INLINE_THROW(uint32_t)
5623iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5624 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
5625{
5626pReNative->pInstrBuf[off++] = 0xcc;
5627 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5628 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5629 RT_NOREF_PV(idxArg0);
5630
5631 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5632 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5633 RT_NOREF_PV(idxArg1);
5634
5635 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5636 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5637 RT_NOREF_PV(idxArg2);
5638
5639 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
5640}
5641
5642
5643#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
5644 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5645 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
5646
5647/** Emits code for IEM_MC_CALL_CIMPL_4. */
5648DECL_INLINE_THROW(uint32_t)
5649iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5650 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
5651{
5652pReNative->pInstrBuf[off++] = 0xcc;
5653 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5654 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5655 RT_NOREF_PV(idxArg0);
5656
5657 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5658 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5659 RT_NOREF_PV(idxArg1);
5660
5661 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5662 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5663 RT_NOREF_PV(idxArg2);
5664
5665 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5666 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5667 RT_NOREF_PV(idxArg3);
5668
5669 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
5670}
5671
5672
5673#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
5674 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5675 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
5676
5677/** Emits code for IEM_MC_CALL_CIMPL_4. */
5678DECL_INLINE_THROW(uint32_t)
5679iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5680 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
5681{
5682pReNative->pInstrBuf[off++] = 0xcc;
5683 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5684 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5685 RT_NOREF_PV(idxArg0);
5686
5687 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5688 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5689 RT_NOREF_PV(idxArg1);
5690
5691 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5692 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5693 RT_NOREF_PV(idxArg2);
5694
5695 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5696 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5697 RT_NOREF_PV(idxArg3);
5698
5699 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
5700 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
5701 RT_NOREF_PV(idxArg4);
5702
5703 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
5704}
5705
5706
5707/** Recompiler debugging: Flush guest register shadow copies. */
5708#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
5709
5710
5711
5712
5713/*********************************************************************************************************************************
5714* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
5715*********************************************************************************************************************************/
5716
5717#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
5718 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
5719
5720/** Emits code for IEM_MC_FETCH_GREG_U16. */
5721DECL_INLINE_THROW(uint32_t)
5722iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
5723{
5724 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
5725 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
5726
5727 /*
5728 * We can either just load the low 16-bit of the GPR into a host register
5729 * for the variable, or we can do so via a shadow copy host register. The
5730 * latter will avoid having to reload it if it's being stored later, but
5731 * will waste a host register if it isn't touched again. Since we don't
5732 * know what going to happen, we choose the latter for now.
5733 */
5734 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5735 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5736 kIemNativeGstRegUse_ReadOnly);
5737
5738 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5739 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
5740 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
5741
5742 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
5743 return off;
5744}
5745
5746
5747
5748/*********************************************************************************************************************************
5749* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
5750*********************************************************************************************************************************/
5751
5752#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
5753 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
5754
5755/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
5756DECL_INLINE_THROW(uint32_t)
5757iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
5758{
5759 Assert(iGRegEx < 20);
5760 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5761 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
5762 kIemNativeGstRegUse_ForUpdate);
5763#ifdef RT_ARCH_AMD64
5764 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5765
5766 /* To the lowest byte of the register: mov r8, imm8 */
5767 if (iGRegEx < 16)
5768 {
5769 if (idxGstTmpReg >= 8)
5770 pbCodeBuf[off++] = X86_OP_REX_B;
5771 else if (idxGstTmpReg >= 4)
5772 pbCodeBuf[off++] = X86_OP_REX;
5773 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5774 pbCodeBuf[off++] = u8Value;
5775 }
5776 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5777 else if (idxGstTmpReg < 4)
5778 {
5779 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5780 pbCodeBuf[off++] = u8Value;
5781 }
5782 else
5783 {
5784 /* ror reg64, 8 */
5785 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5786 pbCodeBuf[off++] = 0xc1;
5787 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5788 pbCodeBuf[off++] = 8;
5789
5790 /* mov reg8, imm8 */
5791 if (idxGstTmpReg >= 8)
5792 pbCodeBuf[off++] = X86_OP_REX_B;
5793 else if (idxGstTmpReg >= 4)
5794 pbCodeBuf[off++] = X86_OP_REX;
5795 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5796 pbCodeBuf[off++] = u8Value;
5797
5798 /* rol reg64, 8 */
5799 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5800 pbCodeBuf[off++] = 0xc1;
5801 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5802 pbCodeBuf[off++] = 8;
5803 }
5804
5805#elif defined(RT_ARCH_ARM64)
5806 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5807 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5808 if (iGRegEx < 16)
5809 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5810 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5811 else
5812 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5813 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5814 iemNativeRegFreeTmp(pReNative, idxImmReg);
5815
5816#else
5817# error "Port me!"
5818#endif
5819
5820 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5821
5822 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
5823
5824 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5825 return off;
5826}
5827
5828
5829#if 0
5830#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
5831 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Value)
5832
5833/** Emits code for IEM_MC_STORE_GREG_U16. */
5834DECL_INLINE_THROW(uint32_t)
5835iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5836{
5837 Assert(iGReg < 16)
5838 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5839 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5840 kIemNativeGstRegUse_ForUpdate);
5841
5842
5843#ifdef RT_ARCH_AMD64
5844 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5845
5846 /* To the lowest byte of the register: mov r8, imm8 */
5847 if (iGRegEx < 16)
5848 {
5849 if (idxGstTmpReg >= 8)
5850 pbCodeBuf[off++] = X86_OP_REX_B;
5851 else if (idxGstTmpReg >= 4)
5852 pbCodeBuf[off++] = X86_OP_REX;
5853 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5854 pbCodeBuf[off++] = u8Value;
5855 }
5856 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5857 else if (idxGstTmpReg < 4)
5858 {
5859 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5860 pbCodeBuf[off++] = u8Value;
5861 }
5862 else
5863 {
5864 /* ror reg64, 8 */
5865 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5866 pbCodeBuf[off++] = 0xc1;
5867 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5868 pbCodeBuf[off++] = 8;
5869
5870 /* mov reg8, imm8 */
5871 if (idxGstTmpReg >= 8)
5872 pbCodeBuf[off++] = X86_OP_REX_B;
5873 else if (idxGstTmpReg >= 4)
5874 pbCodeBuf[off++] = X86_OP_REX;
5875 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5876 pbCodeBuf[off++] = u8Value;
5877
5878 /* rol reg64, 8 */
5879 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5880 pbCodeBuf[off++] = 0xc1;
5881 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5882 pbCodeBuf[off++] = 8;
5883 }
5884
5885#elif defined(RT_ARCH_ARM64)
5886 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5887 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5888 if (iGRegEx < 16)
5889 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5890 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5891 else
5892 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5893 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5894 iemNativeRegFreeTmp(pReNative, idxImmReg);
5895
5896#else
5897# error "Port me!"
5898#endif
5899
5900 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5901
5902 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
5903
5904 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5905 return off;
5906}
5907#endif
5908
5909
5910
5911/*********************************************************************************************************************************
5912* General purpose register manipulation (add, sub). *
5913*********************************************************************************************************************************/
5914
5915#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5916 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5917
5918/** Emits code for IEM_MC_SUB_GREG_U16. */
5919DECL_INLINE_THROW(uint32_t)
5920iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5921{
5922 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5923 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5924 kIemNativeGstRegUse_ForUpdate);
5925
5926#ifdef RT_ARCH_AMD64
5927 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5928 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5929 if (idxGstTmpReg >= 8)
5930 pbCodeBuf[off++] = X86_OP_REX_B;
5931 if (uSubtrahend)
5932 {
5933 pbCodeBuf[off++] = 0xff; /* dec */
5934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5935 }
5936 else
5937 {
5938 pbCodeBuf[off++] = 0x81;
5939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5940 pbCodeBuf[off++] = uSubtrahend;
5941 pbCodeBuf[off++] = 0;
5942 }
5943
5944#else
5945 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5946 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5947
5948 /* sub tmp, gstgrp, uSubtrahend */
5949 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5950
5951 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5952 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5953
5954 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5955#endif
5956
5957 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5958
5959 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5960
5961 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5962 return off;
5963}
5964
5965
5966#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5967 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5968
5969#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5970 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5971
5972/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5973DECL_INLINE_THROW(uint32_t)
5974iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5975{
5976 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5977 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5978 kIemNativeGstRegUse_ForUpdate);
5979
5980#ifdef RT_ARCH_AMD64
5981 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5982 if (f64Bit)
5983 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5984 else if (idxGstTmpReg >= 8)
5985 pbCodeBuf[off++] = X86_OP_REX_B;
5986 if (uSubtrahend == 1)
5987 {
5988 /* dec */
5989 pbCodeBuf[off++] = 0xff;
5990 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5991 }
5992 else if (uSubtrahend < 128)
5993 {
5994 pbCodeBuf[off++] = 0x83; /* sub */
5995 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5996 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5997 }
5998 else
5999 {
6000 pbCodeBuf[off++] = 0x81; /* sub */
6001 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6002 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6003 pbCodeBuf[off++] = 0;
6004 pbCodeBuf[off++] = 0;
6005 pbCodeBuf[off++] = 0;
6006 }
6007
6008#else
6009 /* sub tmp, gstgrp, uSubtrahend */
6010 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6011 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
6012
6013#endif
6014
6015 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6016
6017 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6018
6019 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6020 return off;
6021}
6022
6023
6024
6025/*********************************************************************************************************************************
6026* Builtin functions *
6027*********************************************************************************************************************************/
6028
6029/**
6030 * Built-in function that calls a C-implemention function taking zero arguments.
6031 */
6032static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
6033{
6034 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
6035 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
6036 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
6037 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
6038}
6039
6040
6041/**
6042 * Built-in function that checks for pending interrupts that can be delivered or
6043 * forced action flags.
6044 *
6045 * This triggers after the completion of an instruction, so EIP is already at
6046 * the next instruction. If an IRQ or important FF is pending, this will return
6047 * a non-zero status that stops TB execution.
6048 */
6049static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
6050{
6051 RT_NOREF(pCallEntry);
6052
6053 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
6054 and I'm too lazy to create a 'Fixed' version of that one. */
6055 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
6056 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
6057
6058 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
6059
6060 /* Again, we need to load the extended EFLAGS before we actually need them
6061 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
6062 loaded them inside the check, as the shadow state would not be correct
6063 when the code branches before the load. Ditto PC. */
6064 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6065 kIemNativeGstRegUse_ReadOnly);
6066
6067 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
6068
6069 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6070
6071 /*
6072 * Start by checking the local forced actions of the EMT we're on for IRQs
6073 * and other FFs that needs servicing.
6074 */
6075 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
6076 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
6077 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
6078 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6079 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
6080 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
6081 | VMCPU_FF_TLB_FLUSH
6082 | VMCPU_FF_UNHALT ),
6083 true /*fSetFlags*/);
6084 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
6085 uint32_t const offFixupJumpToVmCheck1 = off;
6086 off = iemNativeEmitJzToFixed(pReNative, off, 0);
6087
6088 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
6089 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
6090 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6091 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
6092 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
6093 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6094
6095 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
6096 suppressed by the CPU or not. */
6097 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
6098 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
6099 idxLabelReturnBreak);
6100
6101 /* We've got shadow flags set, so we must check that the PC they are valid
6102 for matches our current PC value. */
6103 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
6104 * a register. */
6105 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
6106 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
6107
6108 /*
6109 * Now check the force flags of the VM.
6110 */
6111 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
6112 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
6113 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
6114 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
6115 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
6116 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6117
6118 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
6119
6120 /*
6121 * We're good, no IRQs or FFs pending.
6122 */
6123 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6124 iemNativeRegFreeTmp(pReNative, idxEflReg);
6125 iemNativeRegFreeTmp(pReNative, idxPcReg);
6126
6127 return off;
6128}
6129
6130
6131/**
6132 * Built-in function checks if IEMCPU::fExec has the expected value.
6133 */
6134static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
6135{
6136 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
6137 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6138
6139 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6140 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
6141 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
6142 kIemNativeLabelType_ReturnBreak);
6143 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6144 return off;
6145}
6146
6147
6148
6149/*********************************************************************************************************************************
6150* The native code generator functions for each MC block. *
6151*********************************************************************************************************************************/
6152
6153
6154/*
6155 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6156 *
6157 * This should probably live in it's own file later, but lets see what the
6158 * compile times turn out to be first.
6159 */
6160#include "IEMNativeFunctions.cpp.h"
6161
6162
6163
6164/*********************************************************************************************************************************
6165* Recompiler Core. *
6166*********************************************************************************************************************************/
6167
6168
6169/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6170static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6171{
6172 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6173 pDis->cbCachedInstr += cbMaxRead;
6174 RT_NOREF(cbMinRead);
6175 return VERR_NO_DATA;
6176}
6177
6178
6179/**
6180 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6181 * @returns pszBuf.
6182 * @param fFlags The flags.
6183 * @param pszBuf The output buffer.
6184 * @param cbBuf The output buffer size. At least 32 bytes.
6185 */
6186DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6187{
6188 Assert(cbBuf >= 32);
6189 static RTSTRTUPLE const s_aModes[] =
6190 {
6191 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6192 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6193 /* [02] = */ { RT_STR_TUPLE("!2!") },
6194 /* [03] = */ { RT_STR_TUPLE("!3!") },
6195 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6196 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6197 /* [06] = */ { RT_STR_TUPLE("!6!") },
6198 /* [07] = */ { RT_STR_TUPLE("!7!") },
6199 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6200 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6201 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6202 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6203 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6204 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6205 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6206 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6207 /* [10] = */ { RT_STR_TUPLE("!10!") },
6208 /* [11] = */ { RT_STR_TUPLE("!11!") },
6209 /* [12] = */ { RT_STR_TUPLE("!12!") },
6210 /* [13] = */ { RT_STR_TUPLE("!13!") },
6211 /* [14] = */ { RT_STR_TUPLE("!14!") },
6212 /* [15] = */ { RT_STR_TUPLE("!15!") },
6213 /* [16] = */ { RT_STR_TUPLE("!16!") },
6214 /* [17] = */ { RT_STR_TUPLE("!17!") },
6215 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6216 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6217 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6218 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6219 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6220 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6221 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6222 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6223 };
6224 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6225 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6226 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6227
6228 pszBuf[off++] = ' ';
6229 pszBuf[off++] = 'C';
6230 pszBuf[off++] = 'P';
6231 pszBuf[off++] = 'L';
6232 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6233 Assert(off < 32);
6234
6235 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6236
6237 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6238 {
6239 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6240 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6241 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6242 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6243 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6244 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6245 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6246 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6247 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6248 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6249 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6250 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6251 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6252 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6253 };
6254 if (fFlags)
6255 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6256 if (s_aFlags[i].fFlag & fFlags)
6257 {
6258 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6259 pszBuf[off++] = ' ';
6260 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6261 off += s_aFlags[i].cchName;
6262 fFlags &= ~s_aFlags[i].fFlag;
6263 if (!fFlags)
6264 break;
6265 }
6266 pszBuf[off] = '\0';
6267
6268 return pszBuf;
6269}
6270
6271
6272DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
6273{
6274 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
6275
6276 char szDisBuf[512];
6277 DISSTATE Dis;
6278 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
6279 uint32_t const cNative = pTb->Native.cInstructions;
6280 uint32_t offNative = 0;
6281#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6282 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
6283#endif
6284 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6285 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6286 : DISCPUMODE_64BIT;
6287#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6288 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
6289#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6290 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
6291#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6292# error "Port me"
6293#else
6294 csh hDisasm = ~(size_t)0;
6295# if defined(RT_ARCH_AMD64)
6296 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
6297# elif defined(RT_ARCH_ARM64)
6298 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
6299# else
6300# error "Port me"
6301# endif
6302 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
6303#endif
6304
6305 /*
6306 * Print TB info.
6307 */
6308 pHlp->pfnPrintf(pHlp,
6309 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
6310 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
6311 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
6312 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
6313#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6314 if (pDbgInfo && pDbgInfo->cEntries > 1)
6315 {
6316 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
6317
6318 /*
6319 * This disassembly is driven by the debug info which follows the native
6320 * code and indicates when it starts with the next guest instructions,
6321 * where labels are and such things.
6322 */
6323 uint32_t idxThreadedCall = 0;
6324 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
6325 uint8_t idxRange = UINT8_MAX;
6326 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
6327 uint32_t offRange = 0;
6328 uint32_t offOpcodes = 0;
6329 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
6330 uint32_t const cDbgEntries = pDbgInfo->cEntries;
6331 uint32_t iDbgEntry = 1;
6332 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
6333
6334 while (offNative < cNative)
6335 {
6336 /* If we're at or have passed the point where the next chunk of debug
6337 info starts, process it. */
6338 if (offDbgNativeNext <= offNative)
6339 {
6340 offDbgNativeNext = UINT32_MAX;
6341 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
6342 {
6343 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
6344 {
6345 case kIemTbDbgEntryType_GuestInstruction:
6346 {
6347 /* Did the exec flag change? */
6348 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
6349 {
6350 pHlp->pfnPrintf(pHlp,
6351 " fExec change %#08x -> %#08x %s\n",
6352 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6353 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6354 szDisBuf, sizeof(szDisBuf)));
6355 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
6356 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6357 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6358 : DISCPUMODE_64BIT;
6359 }
6360
6361 /* New opcode range? We need to fend up a spurious debug info entry here for cases
6362 where the compilation was aborted before the opcode was recorded and the actual
6363 instruction was translated to a threaded call. This may happen when we run out
6364 of ranges, or when some complicated interrupts/FFs are found to be pending or
6365 similar. So, we just deal with it here rather than in the compiler code as it
6366 is a lot simpler to do up here. */
6367 if ( idxRange == UINT8_MAX
6368 || idxRange >= cRanges
6369 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
6370 {
6371 idxRange += 1;
6372 if (idxRange < cRanges)
6373 offRange = 0;
6374 else
6375 continue;
6376 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
6377 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
6378 + (pTb->aRanges[idxRange].idxPhysPage == 0
6379 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6380 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
6381 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6382 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
6383 pTb->aRanges[idxRange].idxPhysPage);
6384 }
6385
6386 /* Disassemble the instruction. */
6387 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
6388 uint32_t cbInstr = 1;
6389 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6390 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
6391 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6392 if (RT_SUCCESS(rc))
6393 {
6394 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6395 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6396 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6397 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6398
6399 static unsigned const s_offMarker = 55;
6400 static char const s_szMarker[] = " ; <--- guest";
6401 if (cch < s_offMarker)
6402 {
6403 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
6404 cch = s_offMarker;
6405 }
6406 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
6407 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
6408
6409 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
6410 }
6411 else
6412 {
6413 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
6414 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
6415 cbInstr = 1;
6416 }
6417 GCPhysPc += cbInstr;
6418 offOpcodes += cbInstr;
6419 offRange += cbInstr;
6420 continue;
6421 }
6422
6423 case kIemTbDbgEntryType_ThreadedCall:
6424 pHlp->pfnPrintf(pHlp,
6425 " Call #%u to %s (%u args)%s\n",
6426 idxThreadedCall,
6427 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6428 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6429 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
6430 idxThreadedCall++;
6431 continue;
6432
6433 case kIemTbDbgEntryType_GuestRegShadowing:
6434 {
6435 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
6436 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
6437 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
6438 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
6439 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6440 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
6441 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
6442 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
6443 else
6444 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
6445 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
6446 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6447 continue;
6448 }
6449
6450 case kIemTbDbgEntryType_Label:
6451 {
6452 const char *pszName = "what_the_fudge";
6453 const char *pszComment = "";
6454 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
6455 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
6456 {
6457 case kIemNativeLabelType_Return:
6458 pszName = "Return";
6459 break;
6460 case kIemNativeLabelType_ReturnBreak:
6461 pszName = "ReturnBreak";
6462 break;
6463 case kIemNativeLabelType_ReturnWithFlags:
6464 pszName = "ReturnWithFlags";
6465 break;
6466 case kIemNativeLabelType_NonZeroRetOrPassUp:
6467 pszName = "NonZeroRetOrPassUp";
6468 break;
6469 case kIemNativeLabelType_RaiseGp0:
6470 pszName = "RaiseGp0";
6471 break;
6472 case kIemNativeLabelType_If:
6473 pszName = "If";
6474 fNumbered = true;
6475 break;
6476 case kIemNativeLabelType_Else:
6477 pszName = "Else";
6478 fNumbered = true;
6479 pszComment = " ; regs state restored pre-if-block";
6480 break;
6481 case kIemNativeLabelType_Endif:
6482 pszName = "Endif";
6483 fNumbered = true;
6484 break;
6485 case kIemNativeLabelType_CheckIrq:
6486 pszName = "CheckIrq_CheckVM";
6487 fNumbered = true;
6488 break;
6489 case kIemNativeLabelType_Invalid:
6490 case kIemNativeLabelType_End:
6491 break;
6492 }
6493 if (fNumbered)
6494 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
6495 else
6496 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
6497 continue;
6498 }
6499
6500 case kIemTbDbgEntryType_NativeOffset:
6501 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
6502 Assert(offDbgNativeNext > offNative);
6503 break;
6504
6505 default:
6506 AssertFailed();
6507 }
6508 iDbgEntry++;
6509 break;
6510 }
6511 }
6512
6513 /*
6514 * Disassemble the next native instruction.
6515 */
6516 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6517# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6518 uint32_t cbInstr = sizeof(paNative[0]);
6519 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6520 if (RT_SUCCESS(rc))
6521 {
6522# if defined(RT_ARCH_AMD64)
6523 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6524 {
6525 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6526 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6527 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
6528 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6529 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6530 uInfo & 0x8000 ? " - recompiled" : "");
6531 else
6532 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6533 }
6534 else
6535# endif
6536 {
6537# ifdef RT_ARCH_AMD64
6538 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6539 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6540 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6541 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6542# elif defined(RT_ARCH_ARM64)
6543 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6544 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6545 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6546# else
6547# error "Port me"
6548# endif
6549 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6550 }
6551 }
6552 else
6553 {
6554# if defined(RT_ARCH_AMD64)
6555 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6556 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6557# elif defined(RT_ARCH_ARM64)
6558 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6559# else
6560# error "Port me"
6561# endif
6562 cbInstr = sizeof(paNative[0]);
6563 }
6564 offNative += cbInstr / sizeof(paNative[0]);
6565
6566# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6567 cs_insn *pInstr;
6568 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6569 (uintptr_t)pNativeCur, 1, &pInstr);
6570 if (cInstrs > 0)
6571 {
6572 Assert(cInstrs == 1);
6573# if defined(RT_ARCH_AMD64)
6574 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6575 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6576# else
6577 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6578 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6579# endif
6580 offNative += pInstr->size / sizeof(*pNativeCur);
6581 cs_free(pInstr, cInstrs);
6582 }
6583 else
6584 {
6585# if defined(RT_ARCH_AMD64)
6586 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6587 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6588# else
6589 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6590# endif
6591 offNative++;
6592 }
6593# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6594 }
6595 }
6596 else
6597#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
6598 {
6599 /*
6600 * No debug info, just disassemble the x86 code and then the native code.
6601 *
6602 * First the guest code:
6603 */
6604 for (unsigned i = 0; i < pTb->cRanges; i++)
6605 {
6606 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
6607 + (pTb->aRanges[i].idxPhysPage == 0
6608 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6609 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
6610 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6611 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
6612 unsigned off = pTb->aRanges[i].offOpcodes;
6613 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
6614 while (off < cbOpcodes)
6615 {
6616 uint32_t cbInstr = 1;
6617 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6618 &pTb->pabOpcodes[off], cbOpcodes - off,
6619 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6620 if (RT_SUCCESS(rc))
6621 {
6622 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6623 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6624 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6625 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6626 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
6627 GCPhysPc += cbInstr;
6628 off += cbInstr;
6629 }
6630 else
6631 {
6632 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
6633 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
6634 break;
6635 }
6636 }
6637 }
6638
6639 /*
6640 * Then the native code:
6641 */
6642 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
6643 while (offNative < cNative)
6644 {
6645 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6646# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6647 uint32_t cbInstr = sizeof(paNative[0]);
6648 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6649 if (RT_SUCCESS(rc))
6650 {
6651# if defined(RT_ARCH_AMD64)
6652 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6653 {
6654 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6655 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6656 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
6657 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6658 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6659 uInfo & 0x8000 ? " - recompiled" : "");
6660 else
6661 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6662 }
6663 else
6664# endif
6665 {
6666# ifdef RT_ARCH_AMD64
6667 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6668 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6669 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6670 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6671# elif defined(RT_ARCH_ARM64)
6672 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6673 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6674 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6675# else
6676# error "Port me"
6677# endif
6678 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6679 }
6680 }
6681 else
6682 {
6683# if defined(RT_ARCH_AMD64)
6684 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6685 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6686# else
6687 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6688# endif
6689 cbInstr = sizeof(paNative[0]);
6690 }
6691 offNative += cbInstr / sizeof(paNative[0]);
6692
6693# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6694 cs_insn *pInstr;
6695 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6696 (uintptr_t)pNativeCur, 1, &pInstr);
6697 if (cInstrs > 0)
6698 {
6699 Assert(cInstrs == 1);
6700# if defined(RT_ARCH_AMD64)
6701 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6702 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6703# else
6704 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6705 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6706# endif
6707 offNative += pInstr->size / sizeof(*pNativeCur);
6708 cs_free(pInstr, cInstrs);
6709 }
6710 else
6711 {
6712# if defined(RT_ARCH_AMD64)
6713 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6714 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6715# else
6716 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6717# endif
6718 offNative++;
6719 }
6720# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6721 }
6722 }
6723
6724#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6725 /* Cleanup. */
6726 cs_close(&hDisasm);
6727#endif
6728}
6729
6730
6731/**
6732 * Recompiles the given threaded TB into a native one.
6733 *
6734 * In case of failure the translation block will be returned as-is.
6735 *
6736 * @returns pTb.
6737 * @param pVCpu The cross context virtual CPU structure of the calling
6738 * thread.
6739 * @param pTb The threaded translation to recompile to native.
6740 */
6741DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
6742{
6743 /*
6744 * The first time thru, we allocate the recompiler state, the other times
6745 * we just need to reset it before using it again.
6746 */
6747 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
6748 if (RT_LIKELY(pReNative))
6749 iemNativeReInit(pReNative, pTb);
6750 else
6751 {
6752 pReNative = iemNativeInit(pVCpu, pTb);
6753 AssertReturn(pReNative, pTb);
6754 }
6755
6756 /*
6757 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
6758 * for aborting if an error happens.
6759 */
6760 uint32_t cCallsLeft = pTb->Thrd.cCalls;
6761#ifdef LOG_ENABLED
6762 uint32_t const cCallsOrg = cCallsLeft;
6763#endif
6764 uint32_t off = 0;
6765 int rc = VINF_SUCCESS;
6766 IEMNATIVE_TRY_SETJMP(pReNative, rc)
6767 {
6768 /*
6769 * Emit prolog code (fixed).
6770 */
6771 off = iemNativeEmitProlog(pReNative, off);
6772
6773 /*
6774 * Convert the calls to native code.
6775 */
6776#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6777 int32_t iGstInstr = -1;
6778 uint32_t fExec = pTb->fFlags;
6779#endif
6780 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
6781 while (cCallsLeft-- > 0)
6782 {
6783 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
6784
6785 /*
6786 * Debug info and assembly markup.
6787 */
6788#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6789 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
6790 fExec = pCallEntry->auParams[0];
6791 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6792 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
6793 {
6794 if (iGstInstr < (int32_t)pTb->cInstructions)
6795 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
6796 else
6797 Assert(iGstInstr == pTb->cInstructions);
6798 iGstInstr = pCallEntry->idxInstr;
6799 }
6800 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
6801#endif
6802#if defined(VBOX_STRICT) && 1
6803 off = iemNativeEmitMarker(pReNative, off,
6804 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
6805 pCallEntry->enmFunction));
6806#endif
6807
6808 /*
6809 * Actual work.
6810 */
6811 if (pfnRecom) /** @todo stats on this. */
6812 {
6813 //STAM_COUNTER_INC()
6814 off = pfnRecom(pReNative, off, pCallEntry);
6815 }
6816 else
6817 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
6818 Assert(off <= pReNative->cInstrBufAlloc);
6819 Assert(pReNative->cCondDepth == 0);
6820
6821 /*
6822 * Advance.
6823 */
6824 pCallEntry++;
6825 }
6826
6827 /*
6828 * Emit the epilog code.
6829 */
6830 uint32_t idxReturnLabel;
6831 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
6832
6833 /*
6834 * Generate special jump labels.
6835 */
6836 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
6837 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
6838 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
6839 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
6840 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
6841 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
6842 }
6843 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
6844 {
6845 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
6846 return pTb;
6847 }
6848 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
6849 Assert(off <= pReNative->cInstrBufAlloc);
6850
6851 /*
6852 * Make sure all labels has been defined.
6853 */
6854 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
6855#ifdef VBOX_STRICT
6856 uint32_t const cLabels = pReNative->cLabels;
6857 for (uint32_t i = 0; i < cLabels; i++)
6858 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
6859#endif
6860
6861 /*
6862 * Allocate executable memory, copy over the code we've generated.
6863 */
6864 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
6865 if (pTbAllocator->pDelayedFreeHead)
6866 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
6867
6868 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
6869 AssertReturn(paFinalInstrBuf, pTb);
6870 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
6871
6872 /*
6873 * Apply fixups.
6874 */
6875 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
6876 uint32_t const cFixups = pReNative->cFixups;
6877 for (uint32_t i = 0; i < cFixups; i++)
6878 {
6879 Assert(paFixups[i].off < off);
6880 Assert(paFixups[i].idxLabel < cLabels);
6881 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
6882 switch (paFixups[i].enmType)
6883 {
6884#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6885 case kIemNativeFixupType_Rel32:
6886 Assert(paFixups[i].off + 4 <= off);
6887 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6888 continue;
6889
6890#elif defined(RT_ARCH_ARM64)
6891 case kIemNativeFixupType_RelImm26At0:
6892 {
6893 Assert(paFixups[i].off < off);
6894 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6895 Assert(offDisp >= -262144 && offDisp < 262144);
6896 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6897 continue;
6898 }
6899
6900 case kIemNativeFixupType_RelImm19At5:
6901 {
6902 Assert(paFixups[i].off < off);
6903 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6904 Assert(offDisp >= -262144 && offDisp < 262144);
6905 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6906 continue;
6907 }
6908
6909 case kIemNativeFixupType_RelImm14At5:
6910 {
6911 Assert(paFixups[i].off < off);
6912 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6913 Assert(offDisp >= -8192 && offDisp < 8192);
6914 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
6915 continue;
6916 }
6917
6918#endif
6919 case kIemNativeFixupType_Invalid:
6920 case kIemNativeFixupType_End:
6921 break;
6922 }
6923 AssertFailed();
6924 }
6925
6926 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
6927
6928 /*
6929 * Convert the translation block.
6930 */
6931 //RT_BREAKPOINT();
6932 RTMemFree(pTb->Thrd.paCalls);
6933 pTb->Native.paInstructions = paFinalInstrBuf;
6934 pTb->Native.cInstructions = off;
6935 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
6936#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6937 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
6938 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
6939#endif
6940
6941 Assert(pTbAllocator->cThreadedTbs > 0);
6942 pTbAllocator->cThreadedTbs -= 1;
6943 pTbAllocator->cNativeTbs += 1;
6944 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
6945
6946#ifdef LOG_ENABLED
6947 /*
6948 * Disassemble to the log if enabled.
6949 */
6950 if (LogIs3Enabled())
6951 {
6952 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
6953 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
6954 }
6955#endif
6956
6957 return pTb;
6958}
6959
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette