VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102011

Last change on this file since 102011 was 102011, checked in by vboxsync, 13 months ago

VMM/IEM: Added a flush mask for guest register shadows to the IEM_MC_CALL_CIMPL_X macros to better manage register optimizations when recompiling to native code, replacing the IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG/SREG macros added earlier today. Added a IEM_MC_HINT_FLUSH_GUEST_SHADOW macro for debugging purposes. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 297.1 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102011 2023-11-08 22:10:48Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMNativeFunctions.h"
102
103
104/*
105 * Narrow down configs here to avoid wasting time on unused configs here.
106 * Note! Same checks in IEMAllThrdRecompiler.cpp.
107 */
108
109#ifndef IEM_WITH_CODE_TLB
110# error The code TLB must be enabled for the recompiler.
111#endif
112
113#ifndef IEM_WITH_DATA_TLB
114# error The data TLB must be enabled for the recompiler.
115#endif
116
117#ifndef IEM_WITH_SETJMP
118# error The setjmp approach must be enabled for the recompiler.
119#endif
120
121/** @todo eliminate this clang build hack. */
122#if RT_CLANG_PREREQ(4, 0)
123# pragma GCC diagnostic ignored "-Wunused-function"
124#endif
125
126
127
128/*********************************************************************************************************************************
129* Defined Constants And Macros *
130*********************************************************************************************************************************/
131/** Always count instructions for now. */
132#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
133
134
135/*********************************************************************************************************************************
136* Internal Functions *
137*********************************************************************************************************************************/
138#ifdef VBOX_STRICT
139static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
140 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
141#endif
142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
143static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
144static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
145#endif
146
147
148/*********************************************************************************************************************************
149* Executable Memory Allocator *
150*********************************************************************************************************************************/
151/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
152 * Use an alternative chunk sub-allocator that does store internal data
153 * in the chunk.
154 *
155 * Using the RTHeapSimple is not practial on newer darwin systems where
156 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
157 * memory. We would have to change the protection of the whole chunk for
158 * every call to RTHeapSimple, which would be rather expensive.
159 *
160 * This alternative implemenation let restrict page protection modifications
161 * to the pages backing the executable memory we just allocated.
162 */
163#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
164/** The chunk sub-allocation unit size in bytes. */
165#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
166/** The chunk sub-allocation unit size as a shift factor. */
167#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
168
169#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
170# ifdef IEMNATIVE_USE_GDB_JIT
171# define IEMNATIVE_USE_GDB_JIT_ET_DYN
172
173/** GDB JIT: Code entry. */
174typedef struct GDBJITCODEENTRY
175{
176 struct GDBJITCODEENTRY *pNext;
177 struct GDBJITCODEENTRY *pPrev;
178 uint8_t *pbSymFile;
179 uint64_t cbSymFile;
180} GDBJITCODEENTRY;
181
182/** GDB JIT: Actions. */
183typedef enum GDBJITACTIONS : uint32_t
184{
185 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
186} GDBJITACTIONS;
187
188/** GDB JIT: Descriptor. */
189typedef struct GDBJITDESCRIPTOR
190{
191 uint32_t uVersion;
192 GDBJITACTIONS enmAction;
193 GDBJITCODEENTRY *pRelevant;
194 GDBJITCODEENTRY *pHead;
195 /** Our addition: */
196 GDBJITCODEENTRY *pTail;
197} GDBJITDESCRIPTOR;
198
199/** GDB JIT: Our simple symbol file data. */
200typedef struct GDBJITSYMFILE
201{
202 Elf64_Ehdr EHdr;
203# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
204 Elf64_Shdr aShdrs[5];
205# else
206 Elf64_Shdr aShdrs[7];
207 Elf64_Phdr aPhdrs[2];
208# endif
209 /** The dwarf ehframe data for the chunk. */
210 uint8_t abEhFrame[512];
211 char szzStrTab[128];
212 Elf64_Sym aSymbols[3];
213# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
214 Elf64_Sym aDynSyms[2];
215 Elf64_Dyn aDyn[6];
216# endif
217} GDBJITSYMFILE;
218
219extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
220extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
221
222/** Init once for g_IemNativeGdbJitLock. */
223static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
224/** Init once for the critical section. */
225static RTCRITSECT g_IemNativeGdbJitLock;
226
227/** GDB reads the info here. */
228GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
229
230/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
231DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
232{
233 ASMNopPause();
234}
235
236/** @callback_method_impl{FNRTONCE} */
237static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
238{
239 RT_NOREF(pvUser);
240 return RTCritSectInit(&g_IemNativeGdbJitLock);
241}
242
243
244# endif /* IEMNATIVE_USE_GDB_JIT */
245
246/**
247 * Per-chunk unwind info for non-windows hosts.
248 */
249typedef struct IEMEXECMEMCHUNKEHFRAME
250{
251# ifdef IEMNATIVE_USE_LIBUNWIND
252 /** The offset of the FDA into abEhFrame. */
253 uintptr_t offFda;
254# else
255 /** 'struct object' storage area. */
256 uint8_t abObject[1024];
257# endif
258# ifdef IEMNATIVE_USE_GDB_JIT
259# if 0
260 /** The GDB JIT 'symbol file' data. */
261 GDBJITSYMFILE GdbJitSymFile;
262# endif
263 /** The GDB JIT list entry. */
264 GDBJITCODEENTRY GdbJitEntry;
265# endif
266 /** The dwarf ehframe data for the chunk. */
267 uint8_t abEhFrame[512];
268} IEMEXECMEMCHUNKEHFRAME;
269/** Pointer to per-chunk info info for non-windows hosts. */
270typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
271#endif
272
273
274/**
275 * An chunk of executable memory.
276 */
277typedef struct IEMEXECMEMCHUNK
278{
279#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
280 /** Number of free items in this chunk. */
281 uint32_t cFreeUnits;
282 /** Hint were to start searching for free space in the allocation bitmap. */
283 uint32_t idxFreeHint;
284#else
285 /** The heap handle. */
286 RTHEAPSIMPLE hHeap;
287#endif
288 /** Pointer to the chunk. */
289 void *pvChunk;
290#ifdef IN_RING3
291 /**
292 * Pointer to the unwind information.
293 *
294 * This is used during C++ throw and longjmp (windows and probably most other
295 * platforms). Some debuggers (windbg) makes use of it as well.
296 *
297 * Windows: This is allocated from hHeap on windows because (at least for
298 * AMD64) the UNWIND_INFO structure address in the
299 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
300 *
301 * Others: Allocated from the regular heap to avoid unnecessary executable data
302 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
303 void *pvUnwindInfo;
304#elif defined(IN_RING0)
305 /** Allocation handle. */
306 RTR0MEMOBJ hMemObj;
307#endif
308} IEMEXECMEMCHUNK;
309/** Pointer to a memory chunk. */
310typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
311
312
313/**
314 * Executable memory allocator for the native recompiler.
315 */
316typedef struct IEMEXECMEMALLOCATOR
317{
318 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
319 uint32_t uMagic;
320
321 /** The chunk size. */
322 uint32_t cbChunk;
323 /** The maximum number of chunks. */
324 uint32_t cMaxChunks;
325 /** The current number of chunks. */
326 uint32_t cChunks;
327 /** Hint where to start looking for available memory. */
328 uint32_t idxChunkHint;
329 /** Statistics: Current number of allocations. */
330 uint32_t cAllocations;
331
332 /** The total amount of memory available. */
333 uint64_t cbTotal;
334 /** Total amount of free memory. */
335 uint64_t cbFree;
336 /** Total amount of memory allocated. */
337 uint64_t cbAllocated;
338
339#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
340 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
341 *
342 * Since the chunk size is a power of two and the minimum chunk size is a lot
343 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
344 * require a whole number of uint64_t elements in the allocation bitmap. So,
345 * for sake of simplicity, they are allocated as one continous chunk for
346 * simplicity/laziness. */
347 uint64_t *pbmAlloc;
348 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
349 uint32_t cUnitsPerChunk;
350 /** Number of bitmap elements per chunk (for quickly locating the bitmap
351 * portion corresponding to an chunk). */
352 uint32_t cBitmapElementsPerChunk;
353#else
354 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
355 * @{ */
356 /** The size of the heap internal block header. This is used to adjust the
357 * request memory size to make sure there is exacly enough room for a header at
358 * the end of the blocks we allocate before the next 64 byte alignment line. */
359 uint32_t cbHeapBlockHdr;
360 /** The size of initial heap allocation required make sure the first
361 * allocation is correctly aligned. */
362 uint32_t cbHeapAlignTweak;
363 /** The alignment tweak allocation address. */
364 void *pvAlignTweak;
365 /** @} */
366#endif
367
368#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
369 /** Pointer to the array of unwind info running parallel to aChunks (same
370 * allocation as this structure, located after the bitmaps).
371 * (For Windows, the structures must reside in 32-bit RVA distance to the
372 * actual chunk, so they are allocated off the chunk.) */
373 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
374#endif
375
376 /** The allocation chunks. */
377 RT_FLEXIBLE_ARRAY_EXTENSION
378 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
379} IEMEXECMEMALLOCATOR;
380/** Pointer to an executable memory allocator. */
381typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
382
383/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
384#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
385
386
387static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
388
389
390/**
391 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
392 * the heap statistics.
393 */
394static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
395 uint32_t cbReq, uint32_t idxChunk)
396{
397 pExecMemAllocator->cAllocations += 1;
398 pExecMemAllocator->cbAllocated += cbReq;
399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
400 pExecMemAllocator->cbFree -= cbReq;
401#else
402 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
403#endif
404 pExecMemAllocator->idxChunkHint = idxChunk;
405
406#ifdef RT_OS_DARWIN
407 /*
408 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
409 * on darwin. So, we mark the pages returned as read+write after alloc and
410 * expect the caller to call iemExecMemAllocatorReadyForUse when done
411 * writing to the allocation.
412 *
413 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
414 * for details.
415 */
416 /** @todo detect if this is necessary... it wasn't required on 10.15 or
417 * whatever older version it was. */
418 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
419 AssertRC(rc);
420#endif
421
422 return pvRet;
423}
424
425
426#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
427static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
428 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
429{
430 /*
431 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
432 */
433 Assert(!(cToScan & 63));
434 Assert(!(idxFirst & 63));
435 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
436 pbmAlloc += idxFirst / 64;
437
438 /*
439 * Scan the bitmap for cReqUnits of consequtive clear bits
440 */
441 /** @todo This can probably be done more efficiently for non-x86 systems. */
442 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
443 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
444 {
445 uint32_t idxAddBit = 1;
446 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
447 idxAddBit++;
448 if (idxAddBit >= cReqUnits)
449 {
450 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
451
452 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
453 pChunk->cFreeUnits -= cReqUnits;
454 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
455
456 void * const pvRet = (uint8_t *)pChunk->pvChunk
457 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
458
459 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
460 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
461 }
462
463 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
464 }
465 return NULL;
466}
467#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
468
469
470static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
471{
472#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
473 /*
474 * Figure out how much to allocate.
475 */
476 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
477 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
478 {
479 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
480 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
481 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
482 {
483 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
484 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
485 if (pvRet)
486 return pvRet;
487 }
488 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
489 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
490 cReqUnits, idxChunk);
491 }
492#else
493 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
494 if (pvRet)
495 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
496#endif
497 return NULL;
498
499}
500
501
502/**
503 * Allocates @a cbReq bytes of executable memory.
504 *
505 * @returns Pointer to the memory, NULL if out of memory or other problem
506 * encountered.
507 * @param pVCpu The cross context virtual CPU structure of the calling
508 * thread.
509 * @param cbReq How many bytes are required.
510 */
511static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
512{
513 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
514 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
515 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
516
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /* What now? Prune native translation blocks from the cache? */
565 AssertFailed();
566 return NULL;
567}
568
569
570/** This is a hook that we may need later for changing memory protection back
571 * to readonly+exec */
572static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
573{
574#ifdef RT_OS_DARWIN
575 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
576 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
577 AssertRC(rc); RT_NOREF(pVCpu);
578
579 /*
580 * Flush the instruction cache:
581 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
582 */
583 /* sys_dcache_flush(pv, cb); - not necessary */
584 sys_icache_invalidate(pv, cb);
585#else
586 RT_NOREF(pVCpu, pv, cb);
587#endif
588}
589
590
591/**
592 * Frees executable memory.
593 */
594void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
595{
596 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
597 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
598 Assert(pv);
599#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
600 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
601#else
602 Assert(!((uintptr_t)pv & 63));
603#endif
604
605 /* Align the size as we did when allocating the block. */
606#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
607 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
608#else
609 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
610#endif
611
612 /* Free it / assert sanity. */
613#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
614 uint32_t const cChunks = pExecMemAllocator->cChunks;
615 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
616 bool fFound = false;
617 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
618 {
619 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
620 fFound = offChunk < cbChunk;
621 if (fFound)
622 {
623#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
624 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
625 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
626
627 /* Check that it's valid and free it. */
628 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
629 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
630 for (uint32_t i = 1; i < cReqUnits; i++)
631 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
632 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
633
634 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
635 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
636
637 /* Update the stats. */
638 pExecMemAllocator->cbAllocated -= cb;
639 pExecMemAllocator->cbFree += cb;
640 pExecMemAllocator->cAllocations -= 1;
641 return;
642#else
643 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
644 break;
645#endif
646 }
647 }
648# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
649 AssertFailed();
650# else
651 Assert(fFound);
652# endif
653#endif
654
655#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
656 /* Update stats while cb is freshly calculated.*/
657 pExecMemAllocator->cbAllocated -= cb;
658 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
659 pExecMemAllocator->cAllocations -= 1;
660
661 /* Free it. */
662 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
663#endif
664}
665
666
667
668#ifdef IN_RING3
669# ifdef RT_OS_WINDOWS
670
671/**
672 * Initializes the unwind info structures for windows hosts.
673 */
674static int
675iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
676 void *pvChunk, uint32_t idxChunk)
677{
678 RT_NOREF(pVCpu);
679
680 /*
681 * The AMD64 unwind opcodes.
682 *
683 * This is a program that starts with RSP after a RET instruction that
684 * ends up in recompiled code, and the operations we describe here will
685 * restore all non-volatile registers and bring RSP back to where our
686 * RET address is. This means it's reverse order from what happens in
687 * the prologue.
688 *
689 * Note! Using a frame register approach here both because we have one
690 * and but mainly because the UWOP_ALLOC_LARGE argument values
691 * would be a pain to write initializers for. On the positive
692 * side, we're impervious to changes in the the stack variable
693 * area can can deal with dynamic stack allocations if necessary.
694 */
695 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
696 {
697 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
698 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
699 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
700 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
701 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
702 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
703 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
704 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
705 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
706 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
707 };
708 union
709 {
710 IMAGE_UNWIND_INFO Info;
711 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
712 } s_UnwindInfo =
713 {
714 {
715 /* .Version = */ 1,
716 /* .Flags = */ 0,
717 /* .SizeOfProlog = */ 16, /* whatever */
718 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
719 /* .FrameRegister = */ X86_GREG_xBP,
720 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
721 }
722 };
723 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
724 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
725
726 /*
727 * Calc how much space we need and allocate it off the exec heap.
728 */
729 unsigned const cFunctionEntries = 1;
730 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
731 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
732# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
733 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
734 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
735 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
736# else
737 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
738 - pExecMemAllocator->cbHeapBlockHdr;
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
740 32 /*cbAlignment*/);
741# endif
742 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
743 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
744
745 /*
746 * Initialize the structures.
747 */
748 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
749
750 paFunctions[0].BeginAddress = 0;
751 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
752 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
753
754 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
755 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
756
757 /*
758 * Register it.
759 */
760 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
761 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
762
763 return VINF_SUCCESS;
764}
765
766
767# else /* !RT_OS_WINDOWS */
768
769/**
770 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
771 */
772DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
773{
774 if (iValue >= 64)
775 {
776 Assert(iValue < 0x2000);
777 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
778 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
779 }
780 else if (iValue >= 0)
781 *Ptr.pb++ = (uint8_t)iValue;
782 else if (iValue > -64)
783 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
784 else
785 {
786 Assert(iValue > -0x2000);
787 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
788 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
789 }
790 return Ptr;
791}
792
793
794/**
795 * Emits an ULEB128 encoded value (up to 64-bit wide).
796 */
797DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
798{
799 while (uValue >= 0x80)
800 {
801 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
802 uValue >>= 7;
803 }
804 *Ptr.pb++ = (uint8_t)uValue;
805 return Ptr;
806}
807
808
809/**
810 * Emits a CFA rule as register @a uReg + offset @a off.
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
813{
814 *Ptr.pb++ = DW_CFA_def_cfa;
815 Ptr = iemDwarfPutUleb128(Ptr, uReg);
816 Ptr = iemDwarfPutUleb128(Ptr, off);
817 return Ptr;
818}
819
820
821/**
822 * Emits a register (@a uReg) save location:
823 * CFA + @a off * data_alignment_factor
824 */
825DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
826{
827 if (uReg < 0x40)
828 *Ptr.pb++ = DW_CFA_offset | uReg;
829 else
830 {
831 *Ptr.pb++ = DW_CFA_offset_extended;
832 Ptr = iemDwarfPutUleb128(Ptr, uReg);
833 }
834 Ptr = iemDwarfPutUleb128(Ptr, off);
835 return Ptr;
836}
837
838
839# if 0 /* unused */
840/**
841 * Emits a register (@a uReg) save location, using signed offset:
842 * CFA + @a offSigned * data_alignment_factor
843 */
844DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
845{
846 *Ptr.pb++ = DW_CFA_offset_extended_sf;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
849 return Ptr;
850}
851# endif
852
853
854/**
855 * Initializes the unwind info section for non-windows hosts.
856 */
857static int
858iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
859 void *pvChunk, uint32_t idxChunk)
860{
861 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
862 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
863
864 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
865
866 /*
867 * Generate the CIE first.
868 */
869# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
870 uint8_t const iDwarfVer = 3;
871# else
872 uint8_t const iDwarfVer = 4;
873# endif
874 RTPTRUNION const PtrCie = Ptr;
875 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
876 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
877 *Ptr.pb++ = iDwarfVer; /* DwARF version */
878 *Ptr.pb++ = 0; /* Augmentation. */
879 if (iDwarfVer >= 4)
880 {
881 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
882 *Ptr.pb++ = 0; /* Segment selector size. */
883 }
884# ifdef RT_ARCH_AMD64
885 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
886# else
887 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
888# endif
889 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
890# ifdef RT_ARCH_AMD64
891 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
892# elif defined(RT_ARCH_ARM64)
893 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
894# else
895# error "port me"
896# endif
897 /* Initial instructions: */
898# ifdef RT_ARCH_AMD64
899 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
907# elif defined(RT_ARCH_ARM64)
908# if 1
909 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
910# else
911 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
912# endif
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
925 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
926 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
927# else
928# error "port me"
929# endif
930 while ((Ptr.u - PtrCie.u) & 3)
931 *Ptr.pb++ = DW_CFA_nop;
932 /* Finalize the CIE size. */
933 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
934
935 /*
936 * Generate an FDE for the whole chunk area.
937 */
938# ifdef IEMNATIVE_USE_LIBUNWIND
939 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
940# endif
941 RTPTRUNION const PtrFde = Ptr;
942 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
943 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
944 Ptr.pu32++;
945 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
946 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
947# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
948 *Ptr.pb++ = DW_CFA_nop;
949# endif
950 while ((Ptr.u - PtrFde.u) & 3)
951 *Ptr.pb++ = DW_CFA_nop;
952 /* Finalize the FDE size. */
953 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
954
955 /* Terminator entry. */
956 *Ptr.pu32++ = 0;
957 *Ptr.pu32++ = 0; /* just to be sure... */
958 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
959
960 /*
961 * Register it.
962 */
963# ifdef IEMNATIVE_USE_LIBUNWIND
964 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
965# else
966 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
967 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
968# endif
969
970# ifdef IEMNATIVE_USE_GDB_JIT
971 /*
972 * Now for telling GDB about this (experimental).
973 *
974 * This seems to work best with ET_DYN.
975 */
976 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
977# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
978 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
979 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
980# else
981 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
982 - pExecMemAllocator->cbHeapBlockHdr;
983 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
984# endif
985 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
986 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
987
988 RT_ZERO(*pSymFile);
989
990 /*
991 * The ELF header:
992 */
993 pSymFile->EHdr.e_ident[0] = ELFMAG0;
994 pSymFile->EHdr.e_ident[1] = ELFMAG1;
995 pSymFile->EHdr.e_ident[2] = ELFMAG2;
996 pSymFile->EHdr.e_ident[3] = ELFMAG3;
997 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
998 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
999 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1000 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1001# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1002 pSymFile->EHdr.e_type = ET_DYN;
1003# else
1004 pSymFile->EHdr.e_type = ET_REL;
1005# endif
1006# ifdef RT_ARCH_AMD64
1007 pSymFile->EHdr.e_machine = EM_AMD64;
1008# elif defined(RT_ARCH_ARM64)
1009 pSymFile->EHdr.e_machine = EM_AARCH64;
1010# else
1011# error "port me"
1012# endif
1013 pSymFile->EHdr.e_version = 1; /*?*/
1014 pSymFile->EHdr.e_entry = 0;
1015# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1016 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1017# else
1018 pSymFile->EHdr.e_phoff = 0;
1019# endif
1020 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1021 pSymFile->EHdr.e_flags = 0;
1022 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1023# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1024 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1025 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phentsize = 0;
1028 pSymFile->EHdr.e_phnum = 0;
1029# endif
1030 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1031 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1032 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1033
1034 uint32_t offStrTab = 0;
1035#define APPEND_STR(a_szStr) do { \
1036 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1037 offStrTab += sizeof(a_szStr); \
1038 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1039 } while (0)
1040#define APPEND_STR_FMT(a_szStr, ...) do { \
1041 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1042 offStrTab++; \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045
1046 /*
1047 * Section headers.
1048 */
1049 /* Section header #0: NULL */
1050 unsigned i = 0;
1051 APPEND_STR("");
1052 RT_ZERO(pSymFile->aShdrs[i]);
1053 i++;
1054
1055 /* Section header: .eh_frame */
1056 pSymFile->aShdrs[i].sh_name = offStrTab;
1057 APPEND_STR(".eh_frame");
1058 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1059 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1060# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1061 pSymFile->aShdrs[i].sh_offset
1062 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1063# else
1064 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1065 pSymFile->aShdrs[i].sh_offset = 0;
1066# endif
1067
1068 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1069 pSymFile->aShdrs[i].sh_link = 0;
1070 pSymFile->aShdrs[i].sh_info = 0;
1071 pSymFile->aShdrs[i].sh_addralign = 1;
1072 pSymFile->aShdrs[i].sh_entsize = 0;
1073 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1074 i++;
1075
1076 /* Section header: .shstrtab */
1077 unsigned const iShStrTab = i;
1078 pSymFile->EHdr.e_shstrndx = iShStrTab;
1079 pSymFile->aShdrs[i].sh_name = offStrTab;
1080 APPEND_STR(".shstrtab");
1081 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1082 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1083# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1084 pSymFile->aShdrs[i].sh_offset
1085 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1086# else
1087 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1088 pSymFile->aShdrs[i].sh_offset = 0;
1089# endif
1090 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1091 pSymFile->aShdrs[i].sh_link = 0;
1092 pSymFile->aShdrs[i].sh_info = 0;
1093 pSymFile->aShdrs[i].sh_addralign = 1;
1094 pSymFile->aShdrs[i].sh_entsize = 0;
1095 i++;
1096
1097 /* Section header: .symbols */
1098 pSymFile->aShdrs[i].sh_name = offStrTab;
1099 APPEND_STR(".symtab");
1100 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1101 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1102 pSymFile->aShdrs[i].sh_offset
1103 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1104 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1105 pSymFile->aShdrs[i].sh_link = iShStrTab;
1106 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1107 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1108 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1109 i++;
1110
1111# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".dynsym");
1115 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1124 i++;
1125# endif
1126
1127# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1128 /* Section header: .dynamic */
1129 pSymFile->aShdrs[i].sh_name = offStrTab;
1130 APPEND_STR(".dynamic");
1131 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1132 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1133 pSymFile->aShdrs[i].sh_offset
1134 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1135 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1136 pSymFile->aShdrs[i].sh_link = iShStrTab;
1137 pSymFile->aShdrs[i].sh_info = 0;
1138 pSymFile->aShdrs[i].sh_addralign = 1;
1139 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1140 i++;
1141# endif
1142
1143 /* Section header: .text */
1144 unsigned const iShText = i;
1145 pSymFile->aShdrs[i].sh_name = offStrTab;
1146 APPEND_STR(".text");
1147 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1148 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1149# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1150 pSymFile->aShdrs[i].sh_offset
1151 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1152# else
1153 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1154 pSymFile->aShdrs[i].sh_offset = 0;
1155# endif
1156 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1157 pSymFile->aShdrs[i].sh_link = 0;
1158 pSymFile->aShdrs[i].sh_info = 0;
1159 pSymFile->aShdrs[i].sh_addralign = 1;
1160 pSymFile->aShdrs[i].sh_entsize = 0;
1161 i++;
1162
1163 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1164
1165# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1166 /*
1167 * The program headers:
1168 */
1169 /* Everything in a single LOAD segment: */
1170 i = 0;
1171 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1172 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1173 pSymFile->aPhdrs[i].p_offset
1174 = pSymFile->aPhdrs[i].p_vaddr
1175 = pSymFile->aPhdrs[i].p_paddr = 0;
1176 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1177 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1178 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1179 i++;
1180 /* The .dynamic segment. */
1181 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1182 pSymFile->aPhdrs[i].p_flags = PF_R;
1183 pSymFile->aPhdrs[i].p_offset
1184 = pSymFile->aPhdrs[i].p_vaddr
1185 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1186 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1187 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1188 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1189 i++;
1190
1191 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1192
1193 /*
1194 * The dynamic section:
1195 */
1196 i = 0;
1197 pSymFile->aDyn[i].d_tag = DT_SONAME;
1198 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1199 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1202 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1205 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1206 i++;
1207 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1208 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1211 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_NULL;
1214 i++;
1215 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1216# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1217
1218 /*
1219 * Symbol tables:
1220 */
1221 /** @todo gdb doesn't seem to really like this ... */
1222 i = 0;
1223 pSymFile->aSymbols[i].st_name = 0;
1224 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1225 pSymFile->aSymbols[i].st_value = 0;
1226 pSymFile->aSymbols[i].st_size = 0;
1227 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1228 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1229# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1230 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1231# endif
1232 i++;
1233
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240 i++;
1241
1242 pSymFile->aSymbols[i].st_name = offStrTab;
1243 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1244# if 0
1245 pSymFile->aSymbols[i].st_shndx = iShText;
1246 pSymFile->aSymbols[i].st_value = 0;
1247# else
1248 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1249 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1250# endif
1251 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1252 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1253 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1254# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1255 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1256 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1257# endif
1258 i++;
1259
1260 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1261 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1262
1263 /*
1264 * The GDB JIT entry and informing GDB.
1265 */
1266 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1267# if 1
1268 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1269# else
1270 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1271# endif
1272
1273 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1274 RTCritSectEnter(&g_IemNativeGdbJitLock);
1275 pEhFrame->GdbJitEntry.pNext = NULL;
1276 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1277 if (__jit_debug_descriptor.pTail)
1278 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1279 else
1280 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1281 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1282 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1283
1284 /* Notify GDB: */
1285 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1286 __jit_debug_register_code();
1287 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1288 RTCritSectLeave(&g_IemNativeGdbJitLock);
1289
1290# else /* !IEMNATIVE_USE_GDB_JIT */
1291 RT_NOREF(pVCpu);
1292# endif /* !IEMNATIVE_USE_GDB_JIT */
1293
1294 return VINF_SUCCESS;
1295}
1296
1297# endif /* !RT_OS_WINDOWS */
1298#endif /* IN_RING3 */
1299
1300
1301/**
1302 * Adds another chunk to the executable memory allocator.
1303 *
1304 * This is used by the init code for the initial allocation and later by the
1305 * regular allocator function when it's out of memory.
1306 */
1307static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1308{
1309 /* Check that we've room for growth. */
1310 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1311 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1312
1313 /* Allocate a chunk. */
1314#ifdef RT_OS_DARWIN
1315 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1316#else
1317 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1318#endif
1319 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1320
1321#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1322 int rc = VINF_SUCCESS;
1323#else
1324 /* Initialize the heap for the chunk. */
1325 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1326 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1327 AssertRC(rc);
1328 if (RT_SUCCESS(rc))
1329 {
1330 /*
1331 * We want the memory to be aligned on 64 byte, so the first time thru
1332 * here we do some exploratory allocations to see how we can achieve this.
1333 * On subsequent runs we only make an initial adjustment allocation, if
1334 * necessary.
1335 *
1336 * Since we own the heap implementation, we know that the internal block
1337 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1338 * so all we need to wrt allocation size adjustments is to add 32 bytes
1339 * to the size, align up by 64 bytes, and subtract 32 bytes.
1340 *
1341 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1342 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1343 * allocation to force subsequent allocations to return 64 byte aligned
1344 * user areas.
1345 */
1346 if (!pExecMemAllocator->cbHeapBlockHdr)
1347 {
1348 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1349 pExecMemAllocator->cbHeapAlignTweak = 64;
1350 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1351 32 /*cbAlignment*/);
1352 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1353
1354 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1355 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1356 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1357 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1358 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1359
1360 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1361 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1362 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1363 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1364 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1365
1366 RTHeapSimpleFree(hHeap, pvTest2);
1367 RTHeapSimpleFree(hHeap, pvTest1);
1368 }
1369 else
1370 {
1371 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1372 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1373 }
1374 if (RT_SUCCESS(rc))
1375#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1376 {
1377 /*
1378 * Add the chunk.
1379 *
1380 * This must be done before the unwind init so windows can allocate
1381 * memory from the chunk when using the alternative sub-allocator.
1382 */
1383 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1384#ifdef IN_RING3
1385 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1386#endif
1387#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1388 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1389#else
1390 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1391 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1392 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1393 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1394#endif
1395
1396 pExecMemAllocator->cChunks = idxChunk + 1;
1397 pExecMemAllocator->idxChunkHint = idxChunk;
1398
1399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1400 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1401 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1402#else
1403 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1404 pExecMemAllocator->cbTotal += cbFree;
1405 pExecMemAllocator->cbFree += cbFree;
1406#endif
1407
1408#ifdef IN_RING3
1409 /*
1410 * Initialize the unwind information (this cannot really fail atm).
1411 * (This sets pvUnwindInfo.)
1412 */
1413 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1414 if (RT_SUCCESS(rc))
1415#endif
1416 {
1417 return VINF_SUCCESS;
1418 }
1419
1420#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1421 /* Just in case the impossible happens, undo the above up: */
1422 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1423 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1424 pExecMemAllocator->cChunks = idxChunk;
1425 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1426 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1427 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1428 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1429#endif
1430 }
1431#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 }
1433#endif
1434 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1435 RT_NOREF(pVCpu);
1436 return rc;
1437}
1438
1439
1440/**
1441 * Initializes the executable memory allocator for native recompilation on the
1442 * calling EMT.
1443 *
1444 * @returns VBox status code.
1445 * @param pVCpu The cross context virtual CPU structure of the calling
1446 * thread.
1447 * @param cbMax The max size of the allocator.
1448 * @param cbInitial The initial allocator size.
1449 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1450 * dependent).
1451 */
1452int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1453{
1454 /*
1455 * Validate input.
1456 */
1457 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1458 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1459 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1460 || cbChunk == 0
1461 || ( RT_IS_POWER_OF_TWO(cbChunk)
1462 && cbChunk >= _1M
1463 && cbChunk <= _256M
1464 && cbChunk <= cbMax),
1465 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1466 VERR_OUT_OF_RANGE);
1467
1468 /*
1469 * Adjust/figure out the chunk size.
1470 */
1471 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1472 {
1473 if (cbMax >= _256M)
1474 cbChunk = _64M;
1475 else
1476 {
1477 if (cbMax < _16M)
1478 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1479 else
1480 cbChunk = (uint32_t)cbMax / 4;
1481 if (!RT_IS_POWER_OF_TWO(cbChunk))
1482 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1483 }
1484 }
1485
1486 if (cbChunk > cbMax)
1487 cbMax = cbChunk;
1488 else
1489 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1490 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1491 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1492
1493 /*
1494 * Allocate and initialize the allocatore instance.
1495 */
1496 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1497#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1498 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1499 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1500 cbNeeded += cbBitmap * cMaxChunks;
1501 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1502 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1503#endif
1504#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1505 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1506 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1507#endif
1508 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1509 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1510 VERR_NO_MEMORY);
1511 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1512 pExecMemAllocator->cbChunk = cbChunk;
1513 pExecMemAllocator->cMaxChunks = cMaxChunks;
1514 pExecMemAllocator->cChunks = 0;
1515 pExecMemAllocator->idxChunkHint = 0;
1516 pExecMemAllocator->cAllocations = 0;
1517 pExecMemAllocator->cbTotal = 0;
1518 pExecMemAllocator->cbFree = 0;
1519 pExecMemAllocator->cbAllocated = 0;
1520#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1521 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1522 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1523 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1524 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1525#endif
1526#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1527 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1528#endif
1529 for (uint32_t i = 0; i < cMaxChunks; i++)
1530 {
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1533 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1534#else
1535 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1536#endif
1537 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1538#ifdef IN_RING0
1539 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1540#else
1541 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1542#endif
1543 }
1544 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1545
1546 /*
1547 * Do the initial allocations.
1548 */
1549 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1550 {
1551 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1552 AssertLogRelRCReturn(rc, rc);
1553 }
1554
1555 pExecMemAllocator->idxChunkHint = 0;
1556
1557 return VINF_SUCCESS;
1558}
1559
1560
1561/*********************************************************************************************************************************
1562* Native Recompilation *
1563*********************************************************************************************************************************/
1564
1565
1566/**
1567 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1568 */
1569IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1570{
1571 pVCpu->iem.s.cInstructions += idxInstr;
1572 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1573}
1574
1575
1576/**
1577 * Used by TB code when it wants to raise a \#GP(0).
1578 */
1579IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1580{
1581 pVCpu->iem.s.cInstructions += idxInstr;
1582 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1583#ifndef _MSC_VER
1584 return VINF_IEM_RAISED_XCPT; /* not reached */
1585#endif
1586}
1587
1588
1589/**
1590 * Reinitializes the native recompiler state.
1591 *
1592 * Called before starting a new recompile job.
1593 */
1594static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1595{
1596 pReNative->cLabels = 0;
1597 pReNative->bmLabelTypes = 0;
1598 pReNative->cFixups = 0;
1599#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1600 pReNative->pDbgInfo->cEntries = 0;
1601#endif
1602 pReNative->pTbOrg = pTb;
1603 pReNative->cCondDepth = 0;
1604 pReNative->uCondSeqNo = 0;
1605 pReNative->uCheckIrqSeqNo = 0;
1606
1607 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1608#if IEMNATIVE_HST_GREG_COUNT < 32
1609 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1610#endif
1611 ;
1612 pReNative->Core.bmHstRegsWithGstShadow = 0;
1613 pReNative->Core.bmGstRegShadows = 0;
1614 pReNative->Core.bmVars = 0;
1615 pReNative->Core.bmStack = 0;
1616 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1617 pReNative->Core.u64ArgVars = UINT64_MAX;
1618
1619 /* Full host register reinit: */
1620 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1621 {
1622 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1623 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1624 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1625 }
1626
1627 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1628 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1629#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1630 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1631#endif
1632#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1633 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1634#endif
1635 );
1636 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1637 {
1638 fRegs &= ~RT_BIT_32(idxReg);
1639 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1640 }
1641
1642 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1643#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1644 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1645#endif
1646#ifdef IEMNATIVE_REG_FIXED_TMP0
1647 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1648#endif
1649 return pReNative;
1650}
1651
1652
1653/**
1654 * Allocates and initializes the native recompiler state.
1655 *
1656 * This is called the first time an EMT wants to recompile something.
1657 *
1658 * @returns Pointer to the new recompiler state.
1659 * @param pVCpu The cross context virtual CPU structure of the calling
1660 * thread.
1661 * @param pTb The TB that's about to be recompiled.
1662 * @thread EMT(pVCpu)
1663 */
1664static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1665{
1666 VMCPU_ASSERT_EMT(pVCpu);
1667
1668 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1669 AssertReturn(pReNative, NULL);
1670
1671 /*
1672 * Try allocate all the buffers and stuff we need.
1673 */
1674 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1675 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1676 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1677#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1678 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1679#endif
1680 if (RT_LIKELY( pReNative->pInstrBuf
1681 && pReNative->paLabels
1682 && pReNative->paFixups)
1683#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1684 && pReNative->pDbgInfo
1685#endif
1686 )
1687 {
1688 /*
1689 * Set the buffer & array sizes on success.
1690 */
1691 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1692 pReNative->cLabelsAlloc = _8K;
1693 pReNative->cFixupsAlloc = _16K;
1694#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1695 pReNative->cDbgInfoAlloc = _16K;
1696#endif
1697
1698 /*
1699 * Done, just need to save it and reinit it.
1700 */
1701 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1702 return iemNativeReInit(pReNative, pTb);
1703 }
1704
1705 /*
1706 * Failed. Cleanup and return.
1707 */
1708 AssertFailed();
1709 RTMemFree(pReNative->pInstrBuf);
1710 RTMemFree(pReNative->paLabels);
1711 RTMemFree(pReNative->paFixups);
1712#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1713 RTMemFree(pReNative->pDbgInfo);
1714#endif
1715 RTMemFree(pReNative);
1716 return NULL;
1717}
1718
1719
1720/**
1721 * Creates a label
1722 *
1723 * If the label does not yet have a defined position,
1724 * call iemNativeLabelDefine() later to set it.
1725 *
1726 * @returns Label ID. Throws VBox status code on failure, so no need to check
1727 * the return value.
1728 * @param pReNative The native recompile state.
1729 * @param enmType The label type.
1730 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1731 * label is not yet defined (default).
1732 * @param uData Data associated with the lable. Only applicable to
1733 * certain type of labels. Default is zero.
1734 */
1735DECL_HIDDEN_THROW(uint32_t)
1736iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1737 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1738{
1739 /*
1740 * Locate existing label definition.
1741 *
1742 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1743 * and uData is zero.
1744 */
1745 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1746 uint32_t const cLabels = pReNative->cLabels;
1747 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1748#ifndef VBOX_STRICT
1749 && offWhere == UINT32_MAX
1750 && uData == 0
1751#endif
1752 )
1753 {
1754 /** @todo Since this is only used for labels with uData = 0, just use a
1755 * lookup array? */
1756 for (uint32_t i = 0; i < cLabels; i++)
1757 if ( paLabels[i].enmType == enmType
1758 && paLabels[i].uData == uData)
1759 {
1760#ifdef VBOX_STRICT
1761 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1762 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1763#endif
1764 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1765 return i;
1766 }
1767 }
1768
1769 /*
1770 * Make sure we've got room for another label.
1771 */
1772 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1773 { /* likely */ }
1774 else
1775 {
1776 uint32_t cNew = pReNative->cLabelsAlloc;
1777 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1778 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1779 cNew *= 2;
1780 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1781 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1782 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1783 pReNative->paLabels = paLabels;
1784 pReNative->cLabelsAlloc = cNew;
1785 }
1786
1787 /*
1788 * Define a new label.
1789 */
1790 paLabels[cLabels].off = offWhere;
1791 paLabels[cLabels].enmType = enmType;
1792 paLabels[cLabels].uData = uData;
1793 pReNative->cLabels = cLabels + 1;
1794
1795 Assert((unsigned)enmType < 64);
1796 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1797
1798 if (offWhere != UINT32_MAX)
1799 {
1800#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1801 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1802 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1803#endif
1804 }
1805 return cLabels;
1806}
1807
1808
1809/**
1810 * Defines the location of an existing label.
1811 *
1812 * @param pReNative The native recompile state.
1813 * @param idxLabel The label to define.
1814 * @param offWhere The position.
1815 */
1816DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1817{
1818 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1819 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1820 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1821 pLabel->off = offWhere;
1822#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1823 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1824 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1825#endif
1826}
1827
1828
1829/**
1830 * Looks up a lable.
1831 *
1832 * @returns Label ID if found, UINT32_MAX if not.
1833 */
1834static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1835 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1836{
1837 Assert((unsigned)enmType < 64);
1838 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1839 {
1840 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1841 uint32_t const cLabels = pReNative->cLabels;
1842 for (uint32_t i = 0; i < cLabels; i++)
1843 if ( paLabels[i].enmType == enmType
1844 && paLabels[i].uData == uData
1845 && ( paLabels[i].off == offWhere
1846 || offWhere == UINT32_MAX
1847 || paLabels[i].off == UINT32_MAX))
1848 return i;
1849 }
1850 return UINT32_MAX;
1851}
1852
1853
1854/**
1855 * Adds a fixup.
1856 *
1857 * @throws VBox status code (int) on failure.
1858 * @param pReNative The native recompile state.
1859 * @param offWhere The instruction offset of the fixup location.
1860 * @param idxLabel The target label ID for the fixup.
1861 * @param enmType The fixup type.
1862 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1863 */
1864DECL_HIDDEN_THROW(void)
1865iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1866 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1867{
1868 Assert(idxLabel <= UINT16_MAX);
1869 Assert((unsigned)enmType <= UINT8_MAX);
1870
1871 /*
1872 * Make sure we've room.
1873 */
1874 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1875 uint32_t const cFixups = pReNative->cFixups;
1876 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1877 { /* likely */ }
1878 else
1879 {
1880 uint32_t cNew = pReNative->cFixupsAlloc;
1881 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1882 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1883 cNew *= 2;
1884 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1885 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1886 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1887 pReNative->paFixups = paFixups;
1888 pReNative->cFixupsAlloc = cNew;
1889 }
1890
1891 /*
1892 * Add the fixup.
1893 */
1894 paFixups[cFixups].off = offWhere;
1895 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1896 paFixups[cFixups].enmType = enmType;
1897 paFixups[cFixups].offAddend = offAddend;
1898 pReNative->cFixups = cFixups + 1;
1899}
1900
1901
1902/**
1903 * Slow code path for iemNativeInstrBufEnsure.
1904 */
1905DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1906{
1907 /* Double the buffer size till we meet the request. */
1908 uint32_t cNew = pReNative->cInstrBufAlloc;
1909 AssertReturn(cNew > 0, NULL);
1910 do
1911 cNew *= 2;
1912 while (cNew < off + cInstrReq);
1913
1914 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1915#ifdef RT_ARCH_ARM64
1916 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1917#else
1918 uint32_t const cbMaxInstrBuf = _2M;
1919#endif
1920 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1921
1922 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1923 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1924
1925 pReNative->cInstrBufAlloc = cNew;
1926 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1927}
1928
1929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1930
1931/**
1932 * Grows the static debug info array used during recompilation.
1933 *
1934 * @returns Pointer to the new debug info block; throws VBox status code on
1935 * failure, so no need to check the return value.
1936 */
1937DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1938{
1939 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1940 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1941 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1942 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1943 pReNative->pDbgInfo = pDbgInfo;
1944 pReNative->cDbgInfoAlloc = cNew;
1945 return pDbgInfo;
1946}
1947
1948
1949/**
1950 * Adds a new debug info uninitialized entry, returning the pointer to it.
1951 */
1952DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1953{
1954 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1955 { /* likely */ }
1956 else
1957 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1958 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1959}
1960
1961
1962/**
1963 * Debug Info: Adds a native offset record, if necessary.
1964 */
1965static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1966{
1967 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1968
1969 /*
1970 * Search backwards to see if we've got a similar record already.
1971 */
1972 uint32_t idx = pDbgInfo->cEntries;
1973 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1974 while (idx-- > idxStop)
1975 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1976 {
1977 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1978 return;
1979 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1980 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1981 break;
1982 }
1983
1984 /*
1985 * Add it.
1986 */
1987 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1988 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1989 pEntry->NativeOffset.offNative = off;
1990}
1991
1992
1993/**
1994 * Debug Info: Record info about a label.
1995 */
1996static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
1997{
1998 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1999 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2000 pEntry->Label.uUnused = 0;
2001 pEntry->Label.enmLabel = (uint8_t)enmType;
2002 pEntry->Label.uData = uData;
2003}
2004
2005
2006/**
2007 * Debug Info: Record info about a threaded call.
2008 */
2009static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2010{
2011 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2012 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2013 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2014 pEntry->ThreadedCall.uUnused = 0;
2015 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2016}
2017
2018
2019/**
2020 * Debug Info: Record info about a new guest instruction.
2021 */
2022static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2023{
2024 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2025 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2026 pEntry->GuestInstruction.uUnused = 0;
2027 pEntry->GuestInstruction.fExec = fExec;
2028}
2029
2030
2031/**
2032 * Debug Info: Record info about guest register shadowing.
2033 */
2034static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2035 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2036{
2037 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2038 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2039 pEntry->GuestRegShadowing.uUnused = 0;
2040 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2041 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2042 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2043}
2044
2045#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2046
2047
2048/*********************************************************************************************************************************
2049* Register Allocator *
2050*********************************************************************************************************************************/
2051
2052/**
2053 * Register parameter indexes (indexed by argument number).
2054 */
2055DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2056{
2057 IEMNATIVE_CALL_ARG0_GREG,
2058 IEMNATIVE_CALL_ARG1_GREG,
2059 IEMNATIVE_CALL_ARG2_GREG,
2060 IEMNATIVE_CALL_ARG3_GREG,
2061#if defined(IEMNATIVE_CALL_ARG4_GREG)
2062 IEMNATIVE_CALL_ARG4_GREG,
2063# if defined(IEMNATIVE_CALL_ARG5_GREG)
2064 IEMNATIVE_CALL_ARG5_GREG,
2065# if defined(IEMNATIVE_CALL_ARG6_GREG)
2066 IEMNATIVE_CALL_ARG6_GREG,
2067# if defined(IEMNATIVE_CALL_ARG7_GREG)
2068 IEMNATIVE_CALL_ARG7_GREG,
2069# endif
2070# endif
2071# endif
2072#endif
2073};
2074
2075/**
2076 * Call register masks indexed by argument count.
2077 */
2078DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2079{
2080 0,
2081 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2083 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2084 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2085 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2086#if defined(IEMNATIVE_CALL_ARG4_GREG)
2087 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2088 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2089# if defined(IEMNATIVE_CALL_ARG5_GREG)
2090 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2091 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2092# if defined(IEMNATIVE_CALL_ARG6_GREG)
2093 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2094 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2095 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2096# if defined(IEMNATIVE_CALL_ARG7_GREG)
2097 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2098 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2099 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2100# endif
2101# endif
2102# endif
2103#endif
2104};
2105
2106#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2107/**
2108 * BP offset of the stack argument slots.
2109 *
2110 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2111 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2112 */
2113DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2114{
2115 IEMNATIVE_FP_OFF_STACK_ARG0,
2116# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2117 IEMNATIVE_FP_OFF_STACK_ARG1,
2118# endif
2119# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2120 IEMNATIVE_FP_OFF_STACK_ARG2,
2121# endif
2122# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2123 IEMNATIVE_FP_OFF_STACK_ARG3,
2124# endif
2125};
2126AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2127#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2128
2129/**
2130 * Info about shadowed guest register values.
2131 * @see IEMNATIVEGSTREG
2132 */
2133static struct
2134{
2135 /** Offset in VMCPU. */
2136 uint32_t off;
2137 /** The field size. */
2138 uint8_t cb;
2139 /** Name (for logging). */
2140 const char *pszName;
2141} const g_aGstShadowInfo[] =
2142{
2143#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2144 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2145 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2146 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2147 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2148 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2160 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2161 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2162 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2163 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2164 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2165 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2166 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2167 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2168 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2169 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2170 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2171 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2172 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2173 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2174 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2175 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2176 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2177 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2178 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2179 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2180#undef CPUMCTX_OFF_AND_SIZE
2181};
2182AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2183
2184
2185/** Host CPU general purpose register names. */
2186DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2187{
2188#ifdef RT_ARCH_AMD64
2189 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2190#elif RT_ARCH_ARM64
2191 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2192 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2193#else
2194# error "port me"
2195#endif
2196};
2197
2198
2199DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2200 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2201{
2202 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2203
2204 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2205 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2206 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2207 return (uint8_t)idxReg;
2208}
2209
2210
2211/**
2212 * Tries to locate a suitable register in the given register mask.
2213 *
2214 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2215 * failed.
2216 *
2217 * @returns Host register number on success, returns UINT8_MAX on failure.
2218 */
2219static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2220{
2221 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2222 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2223 if (fRegs)
2224 {
2225 /** @todo pick better here: */
2226 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2227
2228 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2229 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2230 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2231 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2232
2233 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2234 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2235 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2236 return idxReg;
2237 }
2238 return UINT8_MAX;
2239}
2240
2241
2242/**
2243 * Locate a register, possibly freeing one up.
2244 *
2245 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2246 * failed.
2247 *
2248 * @returns Host register number on success. Returns UINT8_MAX if no registers
2249 * found, the caller is supposed to deal with this and raise a
2250 * allocation type specific status code (if desired).
2251 *
2252 * @throws VBox status code if we're run into trouble spilling a variable of
2253 * recording debug info. Does NOT throw anything if we're out of
2254 * registers, though.
2255 */
2256static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2257 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2258{
2259 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2260 Assert(!(fRegMask & ~IEMNATIVE_REG_FIXED_MASK));
2261
2262 /*
2263 * Try a freed register that's shadowing a guest register
2264 */
2265 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2266 if (fRegs)
2267 {
2268 unsigned const idxReg = (fPreferVolatile
2269 ? ASMBitFirstSetU32(fRegs)
2270 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2271 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2272 - 1;
2273
2274 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2275 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2276 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2277 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2278
2279 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2280 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2281 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2282 return idxReg;
2283 }
2284
2285 /*
2286 * Try free up a variable that's in a register.
2287 *
2288 * We do two rounds here, first evacuating variables we don't need to be
2289 * saved on the stack, then in the second round move things to the stack.
2290 */
2291 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2292 {
2293 uint32_t fVars = pReNative->Core.bmVars;
2294 while (fVars)
2295 {
2296 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2297 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2298 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2299 && (RT_BIT_32(idxReg) & fRegMask)
2300 && ( iLoop == 0
2301 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2302 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2303 {
2304 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2305 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2306 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2307 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2308 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2309
2310 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2311 {
2312 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
2313 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2314 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff,
2315 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2316 - IEMNATIVE_FP_OFF_STACK_VARS,
2317 idxReg);
2318 }
2319
2320 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2321 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2322 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2323 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2324 return idxReg;
2325 }
2326 fVars &= ~RT_BIT_32(idxVar);
2327 }
2328 }
2329
2330 return UINT8_MAX;
2331}
2332
2333
2334/**
2335 * Moves a variable to a different register or spills it onto the stack.
2336 *
2337 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2338 * kinds can easily be recreated if needed later.
2339 *
2340 * @returns The new code buffer position, UINT32_MAX on failure.
2341 * @param pReNative The native recompile state.
2342 * @param off The current code buffer position.
2343 * @param idxVar The variable index.
2344 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2345 * call-volatile registers.
2346 */
2347static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2348 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2349{
2350 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2351 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2352
2353 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2354 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2355 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2356 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2357 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2358 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2359 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2360 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2361
2362
2363 /** @todo Add statistics on this.*/
2364 /** @todo Implement basic variable liveness analysis (python) so variables
2365 * can be freed immediately once no longer used. This has the potential to
2366 * be trashing registers and stack for dead variables. */
2367
2368 /*
2369 * First try move it to a different register, as that's cheaper.
2370 */
2371 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2372 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2373 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2374 if (fRegs)
2375 {
2376 /* Avoid using shadow registers, if possible. */
2377 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2378 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2379 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2380
2381 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2382 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2383 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2384 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2385 if (fGstRegShadows)
2386 {
2387 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2388 while (fGstRegShadows)
2389 {
2390 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2391 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2392
2393 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2394 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2395 }
2396 }
2397
2398 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2399 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2400 }
2401 /*
2402 * Otherwise we must spill the register onto the stack.
2403 */
2404 else
2405 {
2406 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2407 off = iemNativeEmitStoreGprByBp(pReNative, off,
2408 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2409 - IEMNATIVE_FP_OFF_STACK_VARS,
2410 idxRegOld);
2411
2412 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2413 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2414 }
2415
2416 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2417 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2418 return off;
2419}
2420
2421
2422/**
2423 * Allocates a temporary host general purpose register.
2424 *
2425 * This may emit code to save register content onto the stack in order to free
2426 * up a register.
2427 *
2428 * @returns The host register number; throws VBox status code on failure,
2429 * so no need to check the return value.
2430 * @param pReNative The native recompile state.
2431 * @param poff Pointer to the variable with the code buffer position.
2432 * This will be update if we need to move a variable from
2433 * register to stack in order to satisfy the request.
2434 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2435 * registers (@c true, default) or the other way around
2436 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2437 */
2438DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2439{
2440 /*
2441 * Try find a completely unused register, preferably a call-volatile one.
2442 */
2443 uint8_t idxReg;
2444 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2445 & ~pReNative->Core.bmHstRegsWithGstShadow
2446 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2447 if (fRegs)
2448 {
2449 if (fPreferVolatile)
2450 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2451 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2452 else
2453 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2454 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2455 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2456 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2457 }
2458 else
2459 {
2460 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2461 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2462 }
2463 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2464}
2465
2466
2467/**
2468 * Allocates a temporary register for loading an immediate value into.
2469 *
2470 * This will emit code to load the immediate, unless there happens to be an
2471 * unused register with the value already loaded.
2472 *
2473 * The caller will not modify the returned register, it must be considered
2474 * read-only. Free using iemNativeRegFreeTmpImm.
2475 *
2476 * @returns The host register number; throws VBox status code on failure, so no
2477 * need to check the return value.
2478 * @param pReNative The native recompile state.
2479 * @param poff Pointer to the variable with the code buffer position.
2480 * @param uImm The immediate value that the register must hold upon
2481 * return.
2482 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2483 * registers (@c true, default) or the other way around
2484 * (@c false).
2485 *
2486 * @note Reusing immediate values has not been implemented yet.
2487 */
2488DECL_HIDDEN_THROW(uint8_t)
2489iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2490{
2491 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2492 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2493 return idxReg;
2494}
2495
2496
2497/**
2498 * Marks host register @a idxHstReg as containing a shadow copy of guest
2499 * register @a enmGstReg.
2500 *
2501 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2502 * host register before calling.
2503 */
2504DECL_FORCE_INLINE(void)
2505iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2506{
2507 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2508
2509 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2510 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2511 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2512 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2513#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2514 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2515 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2516#else
2517 RT_NOREF(off);
2518#endif
2519}
2520
2521
2522/**
2523 * Clear any guest register shadow claims from @a idxHstReg.
2524 *
2525 * The register does not need to be shadowing any guest registers.
2526 */
2527DECL_FORCE_INLINE(void)
2528iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2529{
2530 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2531 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2532 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2533 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2534
2535#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2536 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2537 if (fGstRegs)
2538 {
2539 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2540 while (fGstRegs)
2541 {
2542 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2543 fGstRegs &= ~RT_BIT_64(iGstReg);
2544 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2545 }
2546 }
2547#else
2548 RT_NOREF(off);
2549#endif
2550
2551 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2552 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2553 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2554}
2555
2556
2557/**
2558 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2559 * to @a idxRegTo.
2560 */
2561DECL_FORCE_INLINE(void)
2562iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2563 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2564{
2565 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2566 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2567 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2568 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2569 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2570
2571 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2572 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2573 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2574#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2575 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2576 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2577#else
2578 RT_NOREF(off);
2579#endif
2580}
2581
2582
2583/**
2584 * Allocates a temporary host general purpose register for keeping a guest
2585 * register value.
2586 *
2587 * Since we may already have a register holding the guest register value,
2588 * code will be emitted to do the loading if that's not the case. Code may also
2589 * be emitted if we have to free up a register to satify the request.
2590 *
2591 * @returns The host register number; throws VBox status code on failure, so no
2592 * need to check the return value.
2593 * @param pReNative The native recompile state.
2594 * @param poff Pointer to the variable with the code buffer
2595 * position. This will be update if we need to move a
2596 * variable from register to stack in order to satisfy
2597 * the request.
2598 * @param enmGstReg The guest register that will is to be updated.
2599 * @param enmIntendedUse How the caller will be using the host register.
2600 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2601 */
2602DECL_HIDDEN_THROW(uint8_t)
2603iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2604 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2605{
2606 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2607#ifdef LOG_ENABLED
2608 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2609#endif
2610
2611 /*
2612 * First check if the guest register value is already in a host register.
2613 */
2614 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2615 {
2616 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2617 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2618 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2619 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2620
2621 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2622 {
2623 /*
2624 * If the register will trash the guest shadow copy, try find a
2625 * completely unused register we can use instead. If that fails,
2626 * we need to disassociate the host reg from the guest reg.
2627 */
2628 /** @todo would be nice to know if preserving the register is in any way helpful. */
2629 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2630 && ( ~pReNative->Core.bmHstRegs
2631 & ~pReNative->Core.bmHstRegsWithGstShadow
2632 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2633 {
2634 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2635
2636 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2637
2638 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2639 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2640 g_apszIemNativeHstRegNames[idxRegNew]));
2641 idxReg = idxRegNew;
2642 }
2643 else
2644 {
2645 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2646 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2647 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2648 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2649 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2650 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2651 else
2652 {
2653 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2654 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2655 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2656 }
2657 }
2658 }
2659 else
2660 {
2661 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2662 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2663
2664 /*
2665 * Allocate a new register, copy the value and, if updating, the
2666 * guest shadow copy assignment to the new register.
2667 */
2668 /** @todo share register for readonly access. */
2669 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2670
2671 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2672
2673 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2674 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2675 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2676 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2677 else
2678 {
2679 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2680 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2681 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2682 g_apszIemNativeHstRegNames[idxRegNew]));
2683 }
2684 idxReg = idxRegNew;
2685 }
2686
2687#ifdef VBOX_STRICT
2688 /* Strict builds: Check that the value is correct. */
2689 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2690#endif
2691
2692 return idxReg;
2693 }
2694
2695 /*
2696 * Allocate a new register, load it with the guest value and designate it as a copy of the
2697 */
2698 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2699
2700 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2701
2702 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2703 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2704 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2705 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2706
2707 return idxRegNew;
2708}
2709
2710
2711/**
2712 * Allocates a temporary host general purpose register that already holds the
2713 * given guest register value.
2714 *
2715 * The use case for this function is places where the shadowing state cannot be
2716 * modified due to branching and such. This will fail if the we don't have a
2717 * current shadow copy handy or if it's incompatible. The only code that will
2718 * be emitted here is value checking code in strict builds.
2719 *
2720 * The intended use can only be readonly!
2721 *
2722 * @returns The host register number, UINT8_MAX if not present.
2723 * @param pReNative The native recompile state.
2724 * @param poff Pointer to the instruction buffer offset.
2725 * Will be updated in strict builds if a register is
2726 * found.
2727 * @param enmGstReg The guest register that will is to be updated.
2728 * @note In strict builds, this may throw instruction buffer growth failures.
2729 * Non-strict builds will not throw anything.
2730 * @sa iemNativeRegAllocTmpForGuestReg
2731 */
2732DECL_HIDDEN_THROW(uint8_t)
2733iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2734{
2735 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2736
2737 /*
2738 * First check if the guest register value is already in a host register.
2739 */
2740 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2741 {
2742 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2743 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2744 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2745 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2746
2747 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2748 {
2749 /*
2750 * We only do readonly use here, so easy compared to the other
2751 * variant of this code.
2752 */
2753 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2754 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2755 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2756 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2757 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2758
2759#ifdef VBOX_STRICT
2760 /* Strict builds: Check that the value is correct. */
2761 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2762#else
2763 RT_NOREF(poff);
2764#endif
2765 return idxReg;
2766 }
2767 }
2768
2769 return UINT8_MAX;
2770}
2771
2772
2773DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2774
2775
2776/**
2777 * Allocates argument registers for a function call.
2778 *
2779 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2780 * need to check the return value.
2781 * @param pReNative The native recompile state.
2782 * @param off The current code buffer offset.
2783 * @param cArgs The number of arguments the function call takes.
2784 */
2785DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2786{
2787 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2788 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2789 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2790 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2791
2792 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2793 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2794 else if (cArgs == 0)
2795 return true;
2796
2797 /*
2798 * Do we get luck and all register are free and not shadowing anything?
2799 */
2800 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2801 for (uint32_t i = 0; i < cArgs; i++)
2802 {
2803 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2804 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2805 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2806 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2807 }
2808 /*
2809 * Okay, not lucky so we have to free up the registers.
2810 */
2811 else
2812 for (uint32_t i = 0; i < cArgs; i++)
2813 {
2814 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2815 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2816 {
2817 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2818 {
2819 case kIemNativeWhat_Var:
2820 {
2821 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2822 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2823 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2824 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2825 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2826
2827 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2828 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2829 else
2830 {
2831 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2832 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2833 }
2834 break;
2835 }
2836
2837 case kIemNativeWhat_Tmp:
2838 case kIemNativeWhat_Arg:
2839 case kIemNativeWhat_rc:
2840 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2841 default:
2842 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2843 }
2844
2845 }
2846 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2847 {
2848 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2849 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2850 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2851 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2852 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2853 }
2854 else
2855 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2856 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2857 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2858 }
2859 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2860 return true;
2861}
2862
2863
2864DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2865
2866
2867#if 0
2868/**
2869 * Frees a register assignment of any type.
2870 *
2871 * @param pReNative The native recompile state.
2872 * @param idxHstReg The register to free.
2873 *
2874 * @note Does not update variables.
2875 */
2876DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2877{
2878 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2879 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2880 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2881 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2882 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2883 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2884 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2885 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2886 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2887 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2888 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2889 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2890 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2891 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2892
2893 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2894 /* no flushing, right:
2895 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2896 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2897 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2898 */
2899}
2900#endif
2901
2902
2903/**
2904 * Frees a temporary register.
2905 *
2906 * Any shadow copies of guest registers assigned to the host register will not
2907 * be flushed by this operation.
2908 */
2909DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2910{
2911 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2912 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2913 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2914 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2915 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2916}
2917
2918
2919/**
2920 * Frees a temporary immediate register.
2921 *
2922 * It is assumed that the call has not modified the register, so it still hold
2923 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2924 */
2925DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2926{
2927 iemNativeRegFreeTmp(pReNative, idxHstReg);
2928}
2929
2930
2931/**
2932 * Called right before emitting a call instruction to move anything important
2933 * out of call-volatile registers, free and flush the call-volatile registers,
2934 * optionally freeing argument variables.
2935 *
2936 * @returns New code buffer offset, UINT32_MAX on failure.
2937 * @param pReNative The native recompile state.
2938 * @param off The code buffer offset.
2939 * @param cArgs The number of arguments the function call takes.
2940 * It is presumed that the host register part of these have
2941 * been allocated as such already and won't need moving,
2942 * just freeing.
2943 */
2944DECL_HIDDEN_THROW(uint32_t)
2945iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2946{
2947 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
2948
2949 /*
2950 * Move anything important out of volatile registers.
2951 */
2952 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2953 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2954 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2955#ifdef IEMNATIVE_REG_FIXED_TMP0
2956 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2957#endif
2958 & ~g_afIemNativeCallRegs[cArgs];
2959
2960 fRegsToMove &= pReNative->Core.bmHstRegs;
2961 if (!fRegsToMove)
2962 { /* likely */ }
2963 else
2964 while (fRegsToMove != 0)
2965 {
2966 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2967 fRegsToMove &= ~RT_BIT_32(idxReg);
2968
2969 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2970 {
2971 case kIemNativeWhat_Var:
2972 {
2973 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2974 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2975 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2976 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2977 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2978 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2979 else
2980 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2981 continue;
2982 }
2983
2984 case kIemNativeWhat_Arg:
2985 AssertMsgFailed(("What?!?: %u\n", idxReg));
2986 continue;
2987
2988 case kIemNativeWhat_rc:
2989 case kIemNativeWhat_Tmp:
2990 AssertMsgFailed(("Missing free: %u\n", idxReg));
2991 continue;
2992
2993 case kIemNativeWhat_FixedTmp:
2994 case kIemNativeWhat_pVCpuFixed:
2995 case kIemNativeWhat_pCtxFixed:
2996 case kIemNativeWhat_FixedReserved:
2997 case kIemNativeWhat_Invalid:
2998 case kIemNativeWhat_End:
2999 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3000 }
3001 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3002 }
3003
3004 /*
3005 * Do the actual freeing.
3006 */
3007 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3008
3009 /* If there are guest register shadows in any call-volatile register, we
3010 have to clear the corrsponding guest register masks for each register. */
3011 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3012 if (fHstRegsWithGstShadow)
3013 {
3014 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3015 do
3016 {
3017 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3018 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
3019
3020 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3021 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3022 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3023 } while (fHstRegsWithGstShadow != 0);
3024 }
3025
3026 return off;
3027}
3028
3029
3030/**
3031 * Flushes a set of guest register shadow copies.
3032 *
3033 * This is usually done after calling a threaded function or a C-implementation
3034 * of an instruction.
3035 *
3036 * @param pReNative The native recompile state.
3037 * @param fGstRegs Set of guest registers to flush.
3038 */
3039DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3040{
3041 /*
3042 * Reduce the mask by what's currently shadowed
3043 */
3044 fGstRegs &= pReNative->Core.bmGstRegShadows;
3045 if (fGstRegs)
3046 {
3047 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3048 if (pReNative->Core.bmGstRegShadows)
3049 {
3050 /*
3051 * Partial.
3052 */
3053 do
3054 {
3055 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3056 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3057 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3058 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3059 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3060
3061 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3062 fGstRegs &= ~fInThisHstReg;
3063 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3064 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3065 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3066 } while (fGstRegs != 0);
3067 }
3068 else
3069 {
3070 /*
3071 * Clear all.
3072 */
3073 do
3074 {
3075 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3076 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3077 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3078 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3079 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3080
3081 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3082 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3083 } while (fGstRegs != 0);
3084 pReNative->Core.bmHstRegsWithGstShadow = 0;
3085 }
3086 }
3087}
3088
3089
3090/**
3091 * Flushes any delayed guest register writes.
3092 *
3093 * This must be called prior to calling CImpl functions and any helpers that use
3094 * the guest state (like raising exceptions) and such.
3095 *
3096 * This optimization has not yet been implemented. The first target would be
3097 * RIP updates, since these are the most common ones.
3098 */
3099DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3100{
3101 RT_NOREF(pReNative, off);
3102 return off;
3103}
3104
3105
3106/*********************************************************************************************************************************
3107* Code Emitters (larger snippets) *
3108*********************************************************************************************************************************/
3109
3110/**
3111 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3112 * extending to 64-bit width.
3113 *
3114 * @returns New code buffer offset on success, UINT32_MAX on failure.
3115 * @param pReNative .
3116 * @param off The current code buffer position.
3117 * @param idxHstReg The host register to load the guest register value into.
3118 * @param enmGstReg The guest register to load.
3119 *
3120 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3121 * that is something the caller needs to do if applicable.
3122 */
3123DECL_HIDDEN_THROW(uint32_t)
3124iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3125{
3126 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3127 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3128
3129 switch (g_aGstShadowInfo[enmGstReg].cb)
3130 {
3131 case sizeof(uint64_t):
3132 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3133 case sizeof(uint32_t):
3134 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3135 case sizeof(uint16_t):
3136 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3137#if 0 /* not present in the table. */
3138 case sizeof(uint8_t):
3139 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3140#endif
3141 default:
3142 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3143 }
3144}
3145
3146
3147#ifdef VBOX_STRICT
3148/**
3149 * Emitting code that checks that the content of register @a idxReg is the same
3150 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3151 * instruction if that's not the case.
3152 *
3153 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3154 * Trashes EFLAGS on AMD64.
3155 */
3156static uint32_t
3157iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3158{
3159# ifdef RT_ARCH_AMD64
3160 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3161
3162 /* cmp reg, [mem] */
3163 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3164 {
3165 if (idxReg >= 8)
3166 pbCodeBuf[off++] = X86_OP_REX_R;
3167 pbCodeBuf[off++] = 0x38;
3168 }
3169 else
3170 {
3171 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3172 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3173 else
3174 {
3175 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3176 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3177 else
3178 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3179 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3180 if (idxReg >= 8)
3181 pbCodeBuf[off++] = X86_OP_REX_R;
3182 }
3183 pbCodeBuf[off++] = 0x39;
3184 }
3185 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3186
3187 /* je/jz +1 */
3188 pbCodeBuf[off++] = 0x74;
3189 pbCodeBuf[off++] = 0x01;
3190
3191 /* int3 */
3192 pbCodeBuf[off++] = 0xcc;
3193
3194 /* For values smaller than the register size, we must check that the rest
3195 of the register is all zeros. */
3196 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3197 {
3198 /* test reg64, imm32 */
3199 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3200 pbCodeBuf[off++] = 0xf7;
3201 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3202 pbCodeBuf[off++] = 0;
3203 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3204 pbCodeBuf[off++] = 0xff;
3205 pbCodeBuf[off++] = 0xff;
3206
3207 /* je/jz +1 */
3208 pbCodeBuf[off++] = 0x74;
3209 pbCodeBuf[off++] = 0x01;
3210
3211 /* int3 */
3212 pbCodeBuf[off++] = 0xcc;
3213 }
3214 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3215 {
3216 /* rol reg64, 32 */
3217 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3218 pbCodeBuf[off++] = 0xc1;
3219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3220 pbCodeBuf[off++] = 32;
3221
3222 /* test reg32, ffffffffh */
3223 if (idxReg >= 8)
3224 pbCodeBuf[off++] = X86_OP_REX_B;
3225 pbCodeBuf[off++] = 0xf7;
3226 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3227 pbCodeBuf[off++] = 0xff;
3228 pbCodeBuf[off++] = 0xff;
3229 pbCodeBuf[off++] = 0xff;
3230 pbCodeBuf[off++] = 0xff;
3231
3232 /* je/jz +1 */
3233 pbCodeBuf[off++] = 0x74;
3234 pbCodeBuf[off++] = 0x01;
3235
3236 /* int3 */
3237 pbCodeBuf[off++] = 0xcc;
3238
3239 /* rol reg64, 32 */
3240 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3241 pbCodeBuf[off++] = 0xc1;
3242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3243 pbCodeBuf[off++] = 32;
3244 }
3245
3246# elif defined(RT_ARCH_ARM64)
3247 /* mov TMP0, [gstreg] */
3248 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3249
3250 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3251 /* sub tmp0, tmp0, idxReg */
3252 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3253 /* cbz tmp0, +1 */
3254 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3255 /* brk #0x1000+enmGstReg */
3256 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3257
3258# else
3259# error "Port me!"
3260# endif
3261 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3262 return off;
3263}
3264#endif /* VBOX_STRICT */
3265
3266
3267
3268/**
3269 * Emits a code for checking the return code of a call and rcPassUp, returning
3270 * from the code if either are non-zero.
3271 */
3272DECL_HIDDEN_THROW(uint32_t)
3273iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3274{
3275#ifdef RT_ARCH_AMD64
3276 /*
3277 * AMD64: eax = call status code.
3278 */
3279
3280 /* edx = rcPassUp */
3281 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3282# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3283 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3284# endif
3285
3286 /* edx = eax | rcPassUp */
3287 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3288 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3289 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3290 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3291
3292 /* Jump to non-zero status return path. */
3293 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3294
3295 /* done. */
3296
3297#elif RT_ARCH_ARM64
3298 /*
3299 * ARM64: w0 = call status code.
3300 */
3301 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3302 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3303
3304 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3305
3306 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3307
3308 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3309 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3310 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3311
3312#else
3313# error "port me"
3314#endif
3315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3316 return off;
3317}
3318
3319
3320/**
3321 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3322 * raising a \#GP(0) if it isn't.
3323 *
3324 * @returns New code buffer offset, UINT32_MAX on failure.
3325 * @param pReNative The native recompile state.
3326 * @param off The code buffer offset.
3327 * @param idxAddrReg The host register with the address to check.
3328 * @param idxInstr The current instruction.
3329 */
3330DECL_HIDDEN_THROW(uint32_t)
3331iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3332{
3333 RT_NOREF(idxInstr);
3334
3335 /*
3336 * Make sure we don't have any outstanding guest register writes as we may
3337 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3338 */
3339 off = iemNativeRegFlushPendingWrites(pReNative, off);
3340
3341#ifdef RT_ARCH_AMD64
3342 /*
3343 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3344 * return raisexcpt();
3345 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3346 */
3347 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3348
3349 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3350 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3351 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3352 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3353
3354# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3355 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3356# else
3357 uint32_t const offFixup = off;
3358 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3359 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3360 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3361 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3362# endif
3363
3364 iemNativeRegFreeTmp(pReNative, iTmpReg);
3365
3366#elif defined(RT_ARCH_ARM64)
3367 /*
3368 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3369 * return raisexcpt();
3370 * ----
3371 * mov x1, 0x800000000000
3372 * add x1, x0, x1
3373 * cmp xzr, x1, lsr 48
3374 * and either:
3375 * b.ne .Lraisexcpt
3376 * or:
3377 * b.eq .Lnoexcept
3378 * movz x1, #instruction-number
3379 * b .Lraisexcpt
3380 * .Lnoexcept:
3381 */
3382 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3383
3384 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3385 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3386 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3387
3388# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3389 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3390# else
3391 uint32_t const offFixup = off;
3392 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3393 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3394 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3395 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3396# endif
3397
3398 iemNativeRegFreeTmp(pReNative, iTmpReg);
3399
3400#else
3401# error "Port me"
3402#endif
3403 return off;
3404}
3405
3406
3407/**
3408 * Emits code to check if the content of @a idxAddrReg is within the limit of
3409 * idxSegReg, raising a \#GP(0) if it isn't.
3410 *
3411 * @returns New code buffer offset; throws VBox status code on error.
3412 * @param pReNative The native recompile state.
3413 * @param off The code buffer offset.
3414 * @param idxAddrReg The host register (32-bit) with the address to
3415 * check.
3416 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3417 * against.
3418 * @param idxInstr The current instruction.
3419 */
3420DECL_HIDDEN_THROW(uint32_t)
3421iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3422 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3423{
3424 /*
3425 * Make sure we don't have any outstanding guest register writes as we may
3426 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3427 */
3428 off = iemNativeRegFlushPendingWrites(pReNative, off);
3429
3430 /** @todo implement expand down/whatnot checking */
3431 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3432
3433 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3434 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3435 kIemNativeGstRegUse_ForUpdate);
3436
3437 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3438
3439#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3440 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3441 RT_NOREF(idxInstr);
3442#else
3443 uint32_t const offFixup = off;
3444 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3445 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3446 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3447 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3448#endif
3449
3450 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3451 return off;
3452}
3453
3454
3455/**
3456 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3457 *
3458 * @returns The flush mask.
3459 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3460 * @param fGstShwFlush The starting flush mask.
3461 */
3462DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3463{
3464 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3465 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3466 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3467 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3468 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3469 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3470 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3471 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3472 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3473 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3474 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3475 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3476 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3477 return fGstShwFlush;
3478}
3479
3480
3481/**
3482 * Emits a call to a CImpl function or something similar.
3483 */
3484static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3485 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3486 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3487{
3488 /*
3489 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
3490 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
3491 */
3492 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
3493 fGstShwFlush
3494 | RT_BIT_64(kIemNativeGstReg_Pc)
3495 | RT_BIT_64(kIemNativeGstReg_EFlags));
3496 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3497
3498 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3499
3500 /*
3501 * Load the parameters.
3502 */
3503#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3504 /* Special code the hidden VBOXSTRICTRC pointer. */
3505 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3506 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3507 if (cAddParams > 0)
3508 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3509 if (cAddParams > 1)
3510 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3511 if (cAddParams > 2)
3512 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3513 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3514
3515#else
3516 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3517 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3518 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3519 if (cAddParams > 0)
3520 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3521 if (cAddParams > 1)
3522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3523 if (cAddParams > 2)
3524# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3525 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3526# else
3527 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3528# endif
3529#endif
3530
3531 /*
3532 * Make the call.
3533 */
3534 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3535
3536#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3537 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3538#endif
3539
3540 /*
3541 * Check the status code.
3542 */
3543 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3544}
3545
3546
3547/**
3548 * Emits a call to a threaded worker function.
3549 */
3550static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3551{
3552 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3553 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3554 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3555
3556#ifdef RT_ARCH_AMD64
3557 /* Load the parameters and emit the call. */
3558# ifdef RT_OS_WINDOWS
3559# ifndef VBOXSTRICTRC_STRICT_ENABLED
3560 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3561 if (cParams > 0)
3562 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3563 if (cParams > 1)
3564 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3565 if (cParams > 2)
3566 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3567# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3568 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3569 if (cParams > 0)
3570 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3571 if (cParams > 1)
3572 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3573 if (cParams > 2)
3574 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3575 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3576 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3577# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3578# else
3579 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3580 if (cParams > 0)
3581 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3582 if (cParams > 1)
3583 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3584 if (cParams > 2)
3585 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3586# endif
3587
3588 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3589
3590# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3591 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3592# endif
3593
3594#elif RT_ARCH_ARM64
3595 /*
3596 * ARM64:
3597 */
3598 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3599 if (cParams > 0)
3600 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3601 if (cParams > 1)
3602 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3603 if (cParams > 2)
3604 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3605
3606 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3607
3608#else
3609# error "port me"
3610#endif
3611
3612 /*
3613 * Check the status code.
3614 */
3615 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3616
3617 return off;
3618}
3619
3620
3621/**
3622 * Emits the code at the RaiseGP0 label.
3623 */
3624static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3625{
3626 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3627 if (idxLabel != UINT32_MAX)
3628 {
3629 iemNativeLabelDefine(pReNative, idxLabel, off);
3630
3631 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3632 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3633#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3634 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3635#endif
3636 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3637
3638 /* jump back to the return sequence. */
3639 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3640 }
3641 return off;
3642}
3643
3644
3645/**
3646 * Emits the code at the ReturnWithFlags label (returns
3647 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3648 */
3649static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3650{
3651 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3652 if (idxLabel != UINT32_MAX)
3653 {
3654 iemNativeLabelDefine(pReNative, idxLabel, off);
3655
3656 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3657
3658 /* jump back to the return sequence. */
3659 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3660 }
3661 return off;
3662}
3663
3664
3665/**
3666 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3667 */
3668static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3669{
3670 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3671 if (idxLabel != UINT32_MAX)
3672 {
3673 iemNativeLabelDefine(pReNative, idxLabel, off);
3674
3675 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3676
3677 /* jump back to the return sequence. */
3678 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3679 }
3680 return off;
3681}
3682
3683
3684/**
3685 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3686 */
3687static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3688{
3689 /*
3690 * Generate the rc + rcPassUp fiddling code if needed.
3691 */
3692 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3693 if (idxLabel != UINT32_MAX)
3694 {
3695 iemNativeLabelDefine(pReNative, idxLabel, off);
3696
3697 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3698#ifdef RT_ARCH_AMD64
3699# ifdef RT_OS_WINDOWS
3700# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3701 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3702# endif
3703 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3704 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3705# else
3706 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3707 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3708# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3710# endif
3711# endif
3712# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3713 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3714# endif
3715
3716#else
3717 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3718 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3719 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3720#endif
3721
3722 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3723 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3724 }
3725 return off;
3726}
3727
3728
3729/**
3730 * Emits a standard epilog.
3731 */
3732static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3733{
3734 *pidxReturnLabel = UINT32_MAX;
3735
3736 /*
3737 * Successful return, so clear the return register (eax, w0).
3738 */
3739 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3740
3741 /*
3742 * Define label for common return point.
3743 */
3744 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3745 *pidxReturnLabel = idxReturn;
3746
3747 /*
3748 * Restore registers and return.
3749 */
3750#ifdef RT_ARCH_AMD64
3751 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3752
3753 /* Reposition esp at the r15 restore point. */
3754 pbCodeBuf[off++] = X86_OP_REX_W;
3755 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3756 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3757 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3758
3759 /* Pop non-volatile registers and return */
3760 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3761 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3762 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3763 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3764 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3765 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3766 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3767 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3768# ifdef RT_OS_WINDOWS
3769 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3770 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3771# endif
3772 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3773 pbCodeBuf[off++] = 0xc9; /* leave */
3774 pbCodeBuf[off++] = 0xc3; /* ret */
3775 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3776
3777#elif RT_ARCH_ARM64
3778 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3779
3780 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3781 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3782 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3783 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3784 IEMNATIVE_FRAME_VAR_SIZE / 8);
3785 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3786 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3787 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3788 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3789 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3790 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3791 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3792 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3793 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3794 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3795 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3796 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3797
3798 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3799 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3800 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3801 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3802
3803 /* retab / ret */
3804# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3805 if (1)
3806 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3807 else
3808# endif
3809 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3810
3811#else
3812# error "port me"
3813#endif
3814 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3815
3816 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3817}
3818
3819
3820/**
3821 * Emits a standard prolog.
3822 */
3823static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3824{
3825#ifdef RT_ARCH_AMD64
3826 /*
3827 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3828 * reserving 64 bytes for stack variables plus 4 non-register argument
3829 * slots. Fixed register assignment: xBX = pReNative;
3830 *
3831 * Since we always do the same register spilling, we can use the same
3832 * unwind description for all the code.
3833 */
3834 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3835 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3836 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3837 pbCodeBuf[off++] = 0x8b;
3838 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3839 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3840 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3841# ifdef RT_OS_WINDOWS
3842 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3843 pbCodeBuf[off++] = 0x8b;
3844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3845 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3846 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3847# else
3848 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3849 pbCodeBuf[off++] = 0x8b;
3850 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3851# endif
3852 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3853 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3854 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3855 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3856 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3857 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3858 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3859 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3860
3861 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3862 X86_GREG_xSP,
3863 IEMNATIVE_FRAME_ALIGN_SIZE
3864 + IEMNATIVE_FRAME_VAR_SIZE
3865 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3866 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3867 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3868 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3869 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3870
3871#elif RT_ARCH_ARM64
3872 /*
3873 * We set up a stack frame exactly like on x86, only we have to push the
3874 * return address our selves here. We save all non-volatile registers.
3875 */
3876 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3877
3878# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3879 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3880 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3881 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3882 /* pacibsp */
3883 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3884# endif
3885
3886 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3887 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3888 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3889 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3890 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3891 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3892 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3893 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3894 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3895 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3896 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3897 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3898 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3899 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3900 /* Save the BP and LR (ret address) registers at the top of the frame. */
3901 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3902 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3903 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3904 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3905 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3906 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3907
3908 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3909 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3910
3911 /* mov r28, r0 */
3912 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3913 /* mov r27, r1 */
3914 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3915
3916#else
3917# error "port me"
3918#endif
3919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3920 return off;
3921}
3922
3923
3924
3925
3926/*********************************************************************************************************************************
3927* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
3928*********************************************************************************************************************************/
3929
3930#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3931 { \
3932 pReNative->fMc = (a_fMcFlags); \
3933 pReNative->fCImpl = (a_fCImplFlags); \
3934 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
3935
3936/** We have to get to the end in recompilation mode, as otherwise we won't
3937 * generate code for all the IEM_MC_IF_XXX branches. */
3938#define IEM_MC_END() \
3939 } return off
3940
3941
3942
3943/*********************************************************************************************************************************
3944* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
3945*********************************************************************************************************************************/
3946
3947#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
3948 pReNative->fMc = 0; \
3949 pReNative->fCImpl = (a_fFlags); \
3950 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3951
3952
3953#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3954 pReNative->fMc = 0; \
3955 pReNative->fCImpl = (a_fFlags); \
3956 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3957
3958DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3959 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3960 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3961{
3962 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3963}
3964
3965
3966#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3967 pReNative->fMc = 0; \
3968 pReNative->fCImpl = (a_fFlags); \
3969 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
3970 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3971
3972DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3973 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3974 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3975{
3976 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3977}
3978
3979
3980#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3981 pReNative->fMc = 0; \
3982 pReNative->fCImpl = (a_fFlags); \
3983 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
3984 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3985
3986DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3987 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3988 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
3989 uint64_t uArg2)
3990{
3991 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3992}
3993
3994
3995
3996/*********************************************************************************************************************************
3997* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
3998*********************************************************************************************************************************/
3999
4000/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4001 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
4002DECL_INLINE_THROW(uint32_t)
4003iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4004{
4005 /*
4006 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4007 * return with special status code and make the execution loop deal with
4008 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4009 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4010 * could continue w/o interruption, it probably will drop into the
4011 * debugger, so not worth the effort of trying to services it here and we
4012 * just lump it in with the handling of the others.
4013 *
4014 * To simplify the code and the register state management even more (wrt
4015 * immediate in AND operation), we always update the flags and skip the
4016 * extra check associated conditional jump.
4017 */
4018 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4019 <= UINT32_MAX);
4020 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4021 kIemNativeGstRegUse_ForUpdate);
4022 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4023 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4024 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4025 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4026 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4027
4028 /* Free but don't flush the EFLAGS register. */
4029 iemNativeRegFreeTmp(pReNative, idxEflReg);
4030
4031 return off;
4032}
4033
4034
4035#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4036 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4037
4038#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4039 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4040 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4041
4042/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4043DECL_INLINE_THROW(uint32_t)
4044iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4045{
4046 /* Allocate a temporary PC register. */
4047 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4048
4049 /* Perform the addition and store the result. */
4050 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4051 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4052
4053 /* Free but don't flush the PC register. */
4054 iemNativeRegFreeTmp(pReNative, idxPcReg);
4055
4056 return off;
4057}
4058
4059
4060#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4061 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4062
4063#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4064 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4065 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4066
4067/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4068DECL_INLINE_THROW(uint32_t)
4069iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4070{
4071 /* Allocate a temporary PC register. */
4072 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4073
4074 /* Perform the addition and store the result. */
4075 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4076 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4077
4078 /* Free but don't flush the PC register. */
4079 iemNativeRegFreeTmp(pReNative, idxPcReg);
4080
4081 return off;
4082}
4083
4084
4085#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4086 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4087
4088#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4089 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4090 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4091
4092/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4093DECL_INLINE_THROW(uint32_t)
4094iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4095{
4096 /* Allocate a temporary PC register. */
4097 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4098
4099 /* Perform the addition and store the result. */
4100 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4101 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4102 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4103
4104 /* Free but don't flush the PC register. */
4105 iemNativeRegFreeTmp(pReNative, idxPcReg);
4106
4107 return off;
4108}
4109
4110
4111
4112/*********************************************************************************************************************************
4113* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4114*********************************************************************************************************************************/
4115
4116#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4117 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4118 (a_enmEffOpSize), pCallEntry->idxInstr)
4119
4120#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4121 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4122 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4123
4124#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4125 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4126 IEMMODE_16BIT, pCallEntry->idxInstr)
4127
4128#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4129 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4130 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4131
4132#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4133 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4134 IEMMODE_64BIT, pCallEntry->idxInstr)
4135
4136#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4137 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4138 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4139
4140/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4141 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4142 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4143DECL_INLINE_THROW(uint32_t)
4144iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4145 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4146{
4147 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4148
4149 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4150 off = iemNativeRegFlushPendingWrites(pReNative, off);
4151
4152 /* Allocate a temporary PC register. */
4153 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4154
4155 /* Perform the addition. */
4156 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4157
4158 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4159 {
4160 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4161 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4162 }
4163 else
4164 {
4165 /* Just truncate the result to 16-bit IP. */
4166 Assert(enmEffOpSize == IEMMODE_16BIT);
4167 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4168 }
4169 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4170
4171 /* Free but don't flush the PC register. */
4172 iemNativeRegFreeTmp(pReNative, idxPcReg);
4173
4174 return off;
4175}
4176
4177
4178#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4179 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4180 (a_enmEffOpSize), pCallEntry->idxInstr)
4181
4182#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4183 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4184 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4185
4186#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4187 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4188 IEMMODE_16BIT, pCallEntry->idxInstr)
4189
4190#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4191 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4192 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4193
4194#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4195 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4196 IEMMODE_32BIT, pCallEntry->idxInstr)
4197
4198#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4199 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4200 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4201
4202/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4203 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4204 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4205DECL_INLINE_THROW(uint32_t)
4206iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4207 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4208{
4209 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4210
4211 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4212 off = iemNativeRegFlushPendingWrites(pReNative, off);
4213
4214 /* Allocate a temporary PC register. */
4215 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4216
4217 /* Perform the addition. */
4218 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4219
4220 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4221 if (enmEffOpSize == IEMMODE_16BIT)
4222 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4223
4224 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4225 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4226
4227 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4228
4229 /* Free but don't flush the PC register. */
4230 iemNativeRegFreeTmp(pReNative, idxPcReg);
4231
4232 return off;
4233}
4234
4235
4236#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4237 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4238
4239#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4240 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4241 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4242
4243#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4244 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4245
4246#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4247 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4248 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4249
4250#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4251 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4252
4253#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4254 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4256
4257/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4258DECL_INLINE_THROW(uint32_t)
4259iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4260 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4261{
4262 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4263 off = iemNativeRegFlushPendingWrites(pReNative, off);
4264
4265 /* Allocate a temporary PC register. */
4266 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4267
4268 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4269 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4270 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4271 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4272 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4273
4274 /* Free but don't flush the PC register. */
4275 iemNativeRegFreeTmp(pReNative, idxPcReg);
4276
4277 return off;
4278}
4279
4280
4281
4282/*********************************************************************************************************************************
4283* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4284*********************************************************************************************************************************/
4285
4286/**
4287 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4288 *
4289 * @returns Pointer to the condition stack entry on success, NULL on failure
4290 * (too many nestings)
4291 */
4292DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4293{
4294 uint32_t const idxStack = pReNative->cCondDepth;
4295 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4296
4297 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4298 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4299
4300 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4301 pEntry->fInElse = false;
4302 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4303 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4304
4305 return pEntry;
4306}
4307
4308
4309/**
4310 * Start of the if-block, snapshotting the register and variable state.
4311 */
4312DECL_INLINE_THROW(void)
4313iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4314{
4315 Assert(offIfBlock != UINT32_MAX);
4316 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4317 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4318 Assert(!pEntry->fInElse);
4319
4320 /* Define the start of the IF block if request or for disassembly purposes. */
4321 if (idxLabelIf != UINT32_MAX)
4322 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4323#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4324 else
4325 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4326#else
4327 RT_NOREF(offIfBlock);
4328#endif
4329
4330 /* Copy the initial state so we can restore it in the 'else' block. */
4331 pEntry->InitialState = pReNative->Core;
4332}
4333
4334
4335#define IEM_MC_ELSE() } while (0); \
4336 off = iemNativeEmitElse(pReNative, off); \
4337 do {
4338
4339/** Emits code related to IEM_MC_ELSE. */
4340DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4341{
4342 /* Check sanity and get the conditional stack entry. */
4343 Assert(off != UINT32_MAX);
4344 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4345 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4346 Assert(!pEntry->fInElse);
4347
4348 /* Jump to the endif */
4349 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4350
4351 /* Define the else label and enter the else part of the condition. */
4352 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4353 pEntry->fInElse = true;
4354
4355 /* Snapshot the core state so we can do a merge at the endif and restore
4356 the snapshot we took at the start of the if-block. */
4357 pEntry->IfFinalState = pReNative->Core;
4358 pReNative->Core = pEntry->InitialState;
4359
4360 return off;
4361}
4362
4363
4364#define IEM_MC_ENDIF() } while (0); \
4365 off = iemNativeEmitEndIf(pReNative, off)
4366
4367/** Emits code related to IEM_MC_ENDIF. */
4368DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4369{
4370 /* Check sanity and get the conditional stack entry. */
4371 Assert(off != UINT32_MAX);
4372 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4373 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4374
4375 /*
4376 * Now we have find common group with the core state at the end of the
4377 * if-final. Use the smallest common denominator and just drop anything
4378 * that isn't the same in both states.
4379 */
4380 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4381 * which is why we're doing this at the end of the else-block.
4382 * But we'd need more info about future for that to be worth the effort. */
4383 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4384 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4385 {
4386 /* shadow guest stuff first. */
4387 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4388 if (fGstRegs)
4389 {
4390 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4391 do
4392 {
4393 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4394 fGstRegs &= ~RT_BIT_64(idxGstReg);
4395
4396 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4397 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4398 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4399 {
4400 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4401 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4402 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4403 }
4404 } while (fGstRegs);
4405 }
4406 else
4407 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4408
4409 /* Check variables next. For now we must require them to be identical
4410 or stuff we can recreate. */
4411 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4412 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4413 if (fVars)
4414 {
4415 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4416 do
4417 {
4418 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4419 fVars &= ~RT_BIT_32(idxVar);
4420
4421 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4422 {
4423 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4424 continue;
4425 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4426 {
4427 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4428 if (idxHstReg != UINT8_MAX)
4429 {
4430 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4431 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4432 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4433 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4434 }
4435 continue;
4436 }
4437 }
4438 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4439 continue;
4440
4441 /* Irreconcilable, so drop it. */
4442 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4443 if (idxHstReg != UINT8_MAX)
4444 {
4445 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4446 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4447 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4448 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4449 }
4450 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4451 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4452 } while (fVars);
4453 }
4454
4455 /* Finally, check that the host register allocations matches. */
4456 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4457 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4458 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4459 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4460 }
4461
4462 /*
4463 * Define the endif label and maybe the else one if we're still in the 'if' part.
4464 */
4465 if (!pEntry->fInElse)
4466 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4467 else
4468 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4469 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4470
4471 /* Pop the conditional stack.*/
4472 pReNative->cCondDepth -= 1;
4473
4474 return off;
4475}
4476
4477
4478#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4479 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4480 do {
4481
4482/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4483DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4484{
4485 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4486
4487 /* Get the eflags. */
4488 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4489 kIemNativeGstRegUse_ReadOnly);
4490
4491 /* Test and jump. */
4492 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4493
4494 /* Free but don't flush the EFlags register. */
4495 iemNativeRegFreeTmp(pReNative, idxEflReg);
4496
4497 /* Make a copy of the core state now as we start the if-block. */
4498 iemNativeCondStartIfBlock(pReNative, off);
4499
4500 return off;
4501}
4502
4503
4504#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4505 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4506 do {
4507
4508/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4509DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4510{
4511 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4512
4513 /* Get the eflags. */
4514 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4515 kIemNativeGstRegUse_ReadOnly);
4516
4517 /* Test and jump. */
4518 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4519
4520 /* Free but don't flush the EFlags register. */
4521 iemNativeRegFreeTmp(pReNative, idxEflReg);
4522
4523 /* Make a copy of the core state now as we start the if-block. */
4524 iemNativeCondStartIfBlock(pReNative, off);
4525
4526 return off;
4527}
4528
4529
4530#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4531 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4532 do {
4533
4534/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4535DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4536{
4537 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4538
4539 /* Get the eflags. */
4540 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4541 kIemNativeGstRegUse_ReadOnly);
4542
4543 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4544 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4545
4546 /* Test and jump. */
4547 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4548
4549 /* Free but don't flush the EFlags register. */
4550 iemNativeRegFreeTmp(pReNative, idxEflReg);
4551
4552 /* Make a copy of the core state now as we start the if-block. */
4553 iemNativeCondStartIfBlock(pReNative, off);
4554
4555 return off;
4556}
4557
4558
4559#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4560 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4561 do {
4562
4563/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4564DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4565{
4566 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4567
4568 /* Get the eflags. */
4569 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4570 kIemNativeGstRegUse_ReadOnly);
4571
4572 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4573 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4574
4575 /* Test and jump. */
4576 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4577
4578 /* Free but don't flush the EFlags register. */
4579 iemNativeRegFreeTmp(pReNative, idxEflReg);
4580
4581 /* Make a copy of the core state now as we start the if-block. */
4582 iemNativeCondStartIfBlock(pReNative, off);
4583
4584 return off;
4585}
4586
4587
4588#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4589 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4590 do {
4591
4592#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4593 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4594 do {
4595
4596/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4597DECL_INLINE_THROW(uint32_t)
4598iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4599 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4600{
4601 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4602
4603 /* Get the eflags. */
4604 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4605 kIemNativeGstRegUse_ReadOnly);
4606
4607 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4608 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4609
4610 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4611 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4612 Assert(iBitNo1 != iBitNo2);
4613
4614#ifdef RT_ARCH_AMD64
4615 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4616
4617 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4618 if (iBitNo1 > iBitNo2)
4619 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4620 else
4621 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4622 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4623
4624#elif defined(RT_ARCH_ARM64)
4625 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4626 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4627
4628 /* and tmpreg, eflreg, #1<<iBitNo1 */
4629 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4630
4631 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4632 if (iBitNo1 > iBitNo2)
4633 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4634 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4635 else
4636 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4637 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4638
4639 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4640
4641#else
4642# error "Port me"
4643#endif
4644
4645 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4646 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4647 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4648
4649 /* Free but don't flush the EFlags and tmp registers. */
4650 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4651 iemNativeRegFreeTmp(pReNative, idxEflReg);
4652
4653 /* Make a copy of the core state now as we start the if-block. */
4654 iemNativeCondStartIfBlock(pReNative, off);
4655
4656 return off;
4657}
4658
4659
4660#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4661 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4662 do {
4663
4664#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4665 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4666 do {
4667
4668/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4669 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4670DECL_INLINE_THROW(uint32_t)
4671iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4672 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4673{
4674 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4675
4676 /* We need an if-block label for the non-inverted variant. */
4677 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4678 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4679
4680 /* Get the eflags. */
4681 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4682 kIemNativeGstRegUse_ReadOnly);
4683
4684 /* Translate the flag masks to bit numbers. */
4685 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4686 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4687
4688 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4689 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4690 Assert(iBitNo1 != iBitNo);
4691
4692 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4693 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4694 Assert(iBitNo2 != iBitNo);
4695 Assert(iBitNo2 != iBitNo1);
4696
4697#ifdef RT_ARCH_AMD64
4698 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4699#elif defined(RT_ARCH_ARM64)
4700 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4701#endif
4702
4703 /* Check for the lone bit first. */
4704 if (!fInverted)
4705 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4706 else
4707 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4708
4709 /* Then extract and compare the other two bits. */
4710#ifdef RT_ARCH_AMD64
4711 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4712 if (iBitNo1 > iBitNo2)
4713 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4714 else
4715 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4716 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4717
4718#elif defined(RT_ARCH_ARM64)
4719 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4720
4721 /* and tmpreg, eflreg, #1<<iBitNo1 */
4722 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4723
4724 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4725 if (iBitNo1 > iBitNo2)
4726 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4727 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4728 else
4729 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4730 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4731
4732 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4733
4734#else
4735# error "Port me"
4736#endif
4737
4738 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4739 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4740 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4741
4742 /* Free but don't flush the EFlags and tmp registers. */
4743 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4744 iemNativeRegFreeTmp(pReNative, idxEflReg);
4745
4746 /* Make a copy of the core state now as we start the if-block. */
4747 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4748
4749 return off;
4750}
4751
4752
4753#define IEM_MC_IF_CX_IS_NZ() \
4754 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4755 do {
4756
4757/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4758DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4759{
4760 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4761
4762 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4763 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4764 kIemNativeGstRegUse_ReadOnly);
4765 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4766 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4767
4768 iemNativeCondStartIfBlock(pReNative, off);
4769 return off;
4770}
4771
4772
4773#define IEM_MC_IF_ECX_IS_NZ() \
4774 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4775 do {
4776
4777#define IEM_MC_IF_RCX_IS_NZ() \
4778 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4779 do {
4780
4781/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4782DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4783{
4784 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4785
4786 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4787 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4788 kIemNativeGstRegUse_ReadOnly);
4789 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4790 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4791
4792 iemNativeCondStartIfBlock(pReNative, off);
4793 return off;
4794}
4795
4796
4797#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4798 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4799 do {
4800
4801#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4802 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4803 do {
4804
4805/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4806DECL_INLINE_THROW(uint32_t)
4807iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4808{
4809 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4810
4811 /* We have to load both RCX and EFLAGS before we can start branching,
4812 otherwise we'll end up in the else-block with an inconsistent
4813 register allocator state.
4814 Doing EFLAGS first as it's more likely to be loaded, right? */
4815 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4816 kIemNativeGstRegUse_ReadOnly);
4817 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4818 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4819 kIemNativeGstRegUse_ReadOnly);
4820
4821 /** @todo we could reduce this to a single branch instruction by spending a
4822 * temporary register and some setnz stuff. Not sure if loops are
4823 * worth it. */
4824 /* Check CX. */
4825 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4826
4827 /* Check the EFlags bit. */
4828 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4829 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4830 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4831 !fCheckIfSet /*fJmpIfSet*/);
4832
4833 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4834 iemNativeRegFreeTmp(pReNative, idxEflReg);
4835
4836 iemNativeCondStartIfBlock(pReNative, off);
4837 return off;
4838}
4839
4840
4841#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4842 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4843 do {
4844
4845#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4846 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4847 do {
4848
4849#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4850 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4851 do {
4852
4853#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4854 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4855 do {
4856
4857/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4858 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4859 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4860 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4861DECL_INLINE_THROW(uint32_t)
4862iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4863 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4864{
4865 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4866
4867 /* We have to load both RCX and EFLAGS before we can start branching,
4868 otherwise we'll end up in the else-block with an inconsistent
4869 register allocator state.
4870 Doing EFLAGS first as it's more likely to be loaded, right? */
4871 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4872 kIemNativeGstRegUse_ReadOnly);
4873 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4874 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4875 kIemNativeGstRegUse_ReadOnly);
4876
4877 /** @todo we could reduce this to a single branch instruction by spending a
4878 * temporary register and some setnz stuff. Not sure if loops are
4879 * worth it. */
4880 /* Check RCX/ECX. */
4881 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4882
4883 /* Check the EFlags bit. */
4884 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4885 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4886 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4887 !fCheckIfSet /*fJmpIfSet*/);
4888
4889 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4890 iemNativeRegFreeTmp(pReNative, idxEflReg);
4891
4892 iemNativeCondStartIfBlock(pReNative, off);
4893 return off;
4894}
4895
4896
4897
4898/*********************************************************************************************************************************
4899* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
4900*********************************************************************************************************************************/
4901/** Number of hidden arguments for CIMPL calls.
4902 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
4903#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4904# define IEM_CIMPL_HIDDEN_ARGS 3
4905#else
4906# define IEM_CIMPL_HIDDEN_ARGS 2
4907#endif
4908
4909#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
4910 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
4911
4912#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
4913 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
4914
4915#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
4916 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
4917
4918#define IEM_MC_LOCAL(a_Type, a_Name) \
4919 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
4920
4921#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
4922 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
4923
4924
4925/**
4926 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
4927 */
4928DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
4929{
4930 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
4931 return IEM_CIMPL_HIDDEN_ARGS;
4932 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
4933 return 1;
4934 return 0;
4935}
4936
4937
4938/**
4939 * Internal work that allocates a variable with kind set to
4940 * kIemNativeVarKind_Invalid and no current stack allocation.
4941 *
4942 * The kind will either be set by the caller or later when the variable is first
4943 * assigned a value.
4944 */
4945static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
4946{
4947 Assert(cbType > 0 && cbType <= 64);
4948 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
4949 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
4950 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
4951 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
4952 pReNative->Core.aVars[idxVar].cbVar = cbType;
4953 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
4954 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4955 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
4956 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
4957 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
4958 pReNative->Core.aVars[idxVar].u.uValue = 0;
4959 return idxVar;
4960}
4961
4962
4963/**
4964 * Internal work that allocates an argument variable w/o setting enmKind.
4965 */
4966static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
4967{
4968 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
4969 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
4970 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
4971
4972 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
4973 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
4974 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
4975 return idxVar;
4976}
4977
4978
4979/**
4980 * Changes the variable to a stack variable.
4981 *
4982 * Currently this is s only possible to do the first time the variable is used,
4983 * switching later is can be implemented but not done.
4984 *
4985 * @param pReNative The recompiler state.
4986 * @param idxVar The variable.
4987 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
4988 */
4989static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4990{
4991 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
4992 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4993 {
4994 /* We could in theory transition from immediate to stack as well, but it
4995 would involve the caller doing work storing the value on the stack. So,
4996 till that's required we only allow transition from invalid. */
4997 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
4998 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
4999 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
5000
5001 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
5002 {
5003 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5004 {
5005 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
5006 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5007 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5008 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5009 return;
5010 }
5011 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
5012 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5013 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5014 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5015 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5016 uint32_t bmStack = ~pReNative->Core.bmStack;
5017 while (bmStack != UINT32_MAX)
5018 {
5019 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5020 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5021 if (!(iSlot & fBitAlignMask))
5022 {
5023 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5024 {
5025 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5026 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5027 return;
5028 }
5029 }
5030 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5031 }
5032 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5033 }
5034 }
5035}
5036
5037
5038/**
5039 * Changes it to a variable with a constant value.
5040 *
5041 * This does not require stack storage as we know the value and can always
5042 * reload it, unless of course it's referenced.
5043 *
5044 * @param pReNative The recompiler state.
5045 * @param idxVar The variable.
5046 * @param uValue The immediate value.
5047 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5048 */
5049static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5050{
5051 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5052 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5053 {
5054 /* Only simple trasnsitions for now. */
5055 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5056 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5057 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5058 }
5059 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5060}
5061
5062
5063/**
5064 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5065 *
5066 * @param pReNative The recompiler state.
5067 * @param idxVar The variable.
5068 * @param idxOtherVar The variable to take the (stack) address of.
5069 *
5070 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5071 */
5072static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5073{
5074 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5075 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5076
5077 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5078 {
5079 /* Only simple trasnsitions for now. */
5080 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5081 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5082 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5083 }
5084 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5085
5086 /* Update the other variable, ensure it's a stack variable. */
5087 /** @todo handle variables with const values... that's go boom now. */
5088 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5089 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5090}
5091
5092
5093DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5094{
5095 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5096}
5097
5098
5099DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5100{
5101 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5102 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5103 return idxVar;
5104}
5105
5106
5107DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5108{
5109 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5110 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5111 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5112 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5113
5114 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5115 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5116 return idxArgVar;
5117}
5118
5119
5120DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5121{
5122 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5123 iemNativeVarSetKindToStack(pReNative, idxVar);
5124 return idxVar;
5125}
5126
5127
5128DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5129{
5130 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5131 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5132 return idxVar;
5133}
5134
5135
5136/**
5137 * Makes sure variable @a idxVar has a register assigned to it.
5138 *
5139 * @returns The host register number.
5140 * @param pReNative The recompiler state.
5141 * @param idxVar The variable.
5142 * @param poff Pointer to the instruction buffer offset.
5143 * In case a register needs to be freed up.
5144 */
5145DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5146{
5147 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5148
5149 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5150 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5151 return idxReg;
5152
5153 /*
5154 * We have to allocate a register for the variable, even if its a stack one
5155 * as we don't know if there are modification being made to it before its
5156 * finalized (todo: analyze and insert hints about that?).
5157 *
5158 * If we can, we try get the correct register for argument variables. This
5159 * is assuming that most argument variables are fetched as close as possible
5160 * to the actual call, so that there aren't any interfering hidden calls
5161 * (memory accesses, etc) inbetween.
5162 *
5163 * If we cannot or it's a variable, we make sure no argument registers
5164 * that will be used by this MC block will be allocated here, and we always
5165 * prefer non-volatile registers to avoid needing to spill stuff for internal
5166 * call.
5167 */
5168 /** @todo Detect too early argument value fetches and warn about hidden
5169 * calls causing less optimal code to be generated in the python script. */
5170
5171 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5172 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5173 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5174 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5175 else
5176 {
5177 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5178 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5179 & ~pReNative->Core.bmHstRegsWithGstShadow
5180 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5181 & fNotArgsMask;
5182 if (fRegs)
5183 {
5184 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5185 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5186 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5187 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5188 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5189 }
5190 else
5191 {
5192 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5193 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5194 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5195 }
5196 }
5197 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5198 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5199 return idxReg;
5200}
5201
5202
5203
5204/*********************************************************************************************************************************
5205* Emitters for IEM_MC_CALL_CIMPL_XXX *
5206*********************************************************************************************************************************/
5207
5208/**
5209 * Emits code to load a reference to the given guest register into @a idxGprDst.
5210 */
5211DECL_INLINE_THROW(uint32_t)
5212iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5213 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5214{
5215 /*
5216 * Get the offset relative to the CPUMCTX structure.
5217 */
5218 uint32_t offCpumCtx;
5219 switch (enmClass)
5220 {
5221 case kIemNativeGstRegRef_Gpr:
5222 Assert(idxRegInClass < 16);
5223 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5224 break;
5225
5226 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5227 Assert(idxRegInClass < 4);
5228 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5229 break;
5230
5231 case kIemNativeGstRegRef_EFlags:
5232 Assert(idxRegInClass == 0);
5233 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5234 break;
5235
5236 case kIemNativeGstRegRef_MxCsr:
5237 Assert(idxRegInClass == 0);
5238 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5239 break;
5240
5241 case kIemNativeGstRegRef_FpuReg:
5242 Assert(idxRegInClass < 8);
5243 AssertFailed(); /** @todo what kind of indexing? */
5244 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5245 break;
5246
5247 case kIemNativeGstRegRef_MReg:
5248 Assert(idxRegInClass < 8);
5249 AssertFailed(); /** @todo what kind of indexing? */
5250 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5251 break;
5252
5253 case kIemNativeGstRegRef_XReg:
5254 Assert(idxRegInClass < 16);
5255 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5256 break;
5257
5258 default:
5259 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5260 }
5261
5262 /*
5263 * Load the value into the destination register.
5264 */
5265#ifdef RT_ARCH_AMD64
5266 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5267
5268#elif defined(RT_ARCH_ARM64)
5269 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5270 Assert(offCpumCtx < 4096);
5271 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5272
5273#else
5274# error "Port me!"
5275#endif
5276
5277 return off;
5278}
5279
5280
5281/**
5282 * Common code for CIMPL and AIMPL calls.
5283 *
5284 * These are calls that uses argument variables and such. They should not be
5285 * confused with internal calls required to implement an MC operation,
5286 * like a TLB load and similar.
5287 *
5288 * Upon return all that is left to do is to load any hidden arguments and
5289 * perform the call. All argument variables are freed.
5290 *
5291 * @returns New code buffer offset; throws VBox status code on error.
5292 * @param pReNative The native recompile state.
5293 * @param off The code buffer offset.
5294 * @param cArgs The total nubmer of arguments (includes hidden
5295 * count).
5296 * @param cHiddenArgs The number of hidden arguments. The hidden
5297 * arguments must not have any variable declared for
5298 * them, whereas all the regular arguments must
5299 * (tstIEMCheckMc ensures this).
5300 */
5301DECL_HIDDEN_THROW(uint32_t)
5302iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5303{
5304#ifdef VBOX_STRICT
5305 /*
5306 * Assert sanity.
5307 */
5308 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5309 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5310 for (unsigned i = 0; i < cHiddenArgs; i++)
5311 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5312 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5313 {
5314 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5315 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5316 }
5317#endif
5318
5319 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5320
5321 /*
5322 * First, go over the host registers that will be used for arguments and make
5323 * sure they either hold the desired argument or are free.
5324 */
5325 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5326 for (uint32_t i = 0; i < cRegArgs; i++)
5327 {
5328 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5329 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5330 {
5331 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5332 {
5333 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5334 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5335 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5336 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5337 if (uArgNo == i)
5338 { /* prefect */ }
5339 else
5340 {
5341 /* The variable allocator logic should make sure this is impossible. */
5342 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5343
5344 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5345 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5346 else
5347 {
5348 /* just free it, can be reloaded if used again */
5349 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5350 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5351 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5352 }
5353 }
5354 }
5355 else
5356 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5357 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5358 }
5359 }
5360
5361 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5362
5363 /*
5364 * Make sure the argument variables are loaded into their respective registers.
5365 *
5366 * We can optimize this by ASSUMING that any register allocations are for
5367 * registeres that have already been loaded and are ready. The previous step
5368 * saw to that.
5369 */
5370 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5371 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5372 {
5373 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5374 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5375 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5376 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5377 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5378 else
5379 {
5380 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5381 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5382 {
5383 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5384 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5385 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5386 | RT_BIT_32(idxArgReg);
5387 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5388 }
5389 else
5390 {
5391 /* Use ARG0 as temp for stuff we need registers for. */
5392 switch (pReNative->Core.aVars[idxVar].enmKind)
5393 {
5394 case kIemNativeVarKind_Stack:
5395 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5396 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5397 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg,
5398 IEMNATIVE_FP_OFF_STACK_VARS
5399 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5400 continue;
5401
5402 case kIemNativeVarKind_Immediate:
5403 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5404 continue;
5405
5406 case kIemNativeVarKind_VarRef:
5407 {
5408 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5409 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5410 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5411 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5412 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5413 IEMNATIVE_FP_OFF_STACK_VARS
5414 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5415 continue;
5416 }
5417
5418 case kIemNativeVarKind_GstRegRef:
5419 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5420 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5421 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5422 continue;
5423
5424 case kIemNativeVarKind_Invalid:
5425 case kIemNativeVarKind_End:
5426 break;
5427 }
5428 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5429 }
5430 }
5431 }
5432#ifdef VBOX_STRICT
5433 else
5434 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5435 {
5436 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5437 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5438 }
5439#endif
5440
5441#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5442 /*
5443 * If there are any stack arguments, make sure they are in their place as well.
5444 *
5445 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5446 * caller will load it later and it must be free (see first loop).
5447 */
5448 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5449 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5450 {
5451 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5452 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5453 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5454 {
5455 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5456 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5457 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5458 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5459 }
5460 else
5461 {
5462 /* Use ARG0 as temp for stuff we need registers for. */
5463 switch (pReNative->Core.aVars[idxVar].enmKind)
5464 {
5465 case kIemNativeVarKind_Stack:
5466 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5467 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5468 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5469 IEMNATIVE_FP_OFF_STACK_VARS
5470 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5471 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5472 continue;
5473
5474 case kIemNativeVarKind_Immediate:
5475 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5476 continue;
5477
5478 case kIemNativeVarKind_VarRef:
5479 {
5480 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5481 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5482 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5483 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5484 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5485 IEMNATIVE_FP_OFF_STACK_VARS
5486 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5487 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5488 continue;
5489 }
5490
5491 case kIemNativeVarKind_GstRegRef:
5492 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5493 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5494 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5495 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5496 continue;
5497
5498 case kIemNativeVarKind_Invalid:
5499 case kIemNativeVarKind_End:
5500 break;
5501 }
5502 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5503 }
5504 }
5505#else
5506 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5507#endif
5508
5509 /*
5510 * Free all argument variables (simplified).
5511 * Their lifetime always expires with the call they are for.
5512 */
5513 /** @todo Make the python script check that arguments aren't used after
5514 * IEM_MC_CALL_XXXX. */
5515 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
5516 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
5517 * an argument value. */
5518 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5519 {
5520 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
5521 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5522 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5523 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5524 }
5525 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5526
5527 /*
5528 * Flush volatile registers as we make the call.
5529 */
5530 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5531
5532 return off;
5533}
5534
5535
5536/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5537DECL_HIDDEN_THROW(uint32_t)
5538iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5539 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
5540
5541{
5542 /*
5543 * Do all the call setup and cleanup.
5544 */
5545 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5546
5547 /*
5548 * Load the two hidden arguments.
5549 */
5550#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5551 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5552 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5553 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5554#else
5555 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5556 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5557#endif
5558
5559 /*
5560 * Make the call and check the return code.
5561 *
5562 * Shadow PC copies are always flushed here, other stuff depends on flags.
5563 * Segment and general purpose registers are explictily flushed via the
5564 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
5565 * macros.
5566 */
5567 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5568#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5569 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5570#endif
5571 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
5572 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
5573 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5574 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5575
5576 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5577}
5578
5579
5580#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5581 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
5582
5583/** Emits code for IEM_MC_CALL_CIMPL_1. */
5584DECL_INLINE_THROW(uint32_t)
5585iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5586 uintptr_t pfnCImpl, uint8_t idxArg0)
5587{
5588 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5589 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5590 RT_NOREF_PV(idxArg0);
5591
5592 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
5593}
5594
5595
5596#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5597 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
5598
5599/** Emits code for IEM_MC_CALL_CIMPL_2. */
5600DECL_INLINE_THROW(uint32_t)
5601iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5602 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
5603{
5604 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5605 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5606 RT_NOREF_PV(idxArg0);
5607
5608 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5609 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5610 RT_NOREF_PV(idxArg1);
5611
5612 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
5613}
5614
5615
5616#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5617 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5618 (uintptr_t)a_pfnCImpl, a0, a1, a2)
5619
5620/** Emits code for IEM_MC_CALL_CIMPL_3. */
5621DECL_INLINE_THROW(uint32_t)
5622iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5623 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
5624{
5625pReNative->pInstrBuf[off++] = 0xcc;
5626 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5627 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5628 RT_NOREF_PV(idxArg0);
5629
5630 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5631 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5632 RT_NOREF_PV(idxArg1);
5633
5634 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5635 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5636 RT_NOREF_PV(idxArg2);
5637
5638 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
5639}
5640
5641
5642#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
5643 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5644 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
5645
5646/** Emits code for IEM_MC_CALL_CIMPL_4. */
5647DECL_INLINE_THROW(uint32_t)
5648iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5649 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
5650{
5651pReNative->pInstrBuf[off++] = 0xcc;
5652 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5653 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5654 RT_NOREF_PV(idxArg0);
5655
5656 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5657 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5658 RT_NOREF_PV(idxArg1);
5659
5660 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5661 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5662 RT_NOREF_PV(idxArg2);
5663
5664 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5665 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5666 RT_NOREF_PV(idxArg3);
5667
5668 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
5669}
5670
5671
5672#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
5673 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5674 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
5675
5676/** Emits code for IEM_MC_CALL_CIMPL_4. */
5677DECL_INLINE_THROW(uint32_t)
5678iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5679 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
5680{
5681pReNative->pInstrBuf[off++] = 0xcc;
5682 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5683 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5684 RT_NOREF_PV(idxArg0);
5685
5686 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5687 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5688 RT_NOREF_PV(idxArg1);
5689
5690 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5691 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5692 RT_NOREF_PV(idxArg2);
5693
5694 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5695 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5696 RT_NOREF_PV(idxArg3);
5697
5698 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
5699 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
5700 RT_NOREF_PV(idxArg4);
5701
5702 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
5703}
5704
5705
5706/** Recompiler debugging: Flush guest register shadow copies. */
5707#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
5708
5709
5710
5711
5712/*********************************************************************************************************************************
5713* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
5714*********************************************************************************************************************************/
5715
5716#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
5717 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
5718
5719/** Emits code for IEM_MC_FETCH_GREG_U16. */
5720DECL_INLINE_THROW(uint32_t)
5721iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
5722{
5723 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
5724 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
5725
5726 /*
5727 * We can either just load the low 16-bit of the GPR into a host register
5728 * for the variable, or we can do so via a shadow copy host register. The
5729 * latter will avoid having to reload it if it's being stored later, but
5730 * will waste a host register if it isn't touched again. Since we don't
5731 * know what going to happen, we choose the latter for now.
5732 */
5733 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5734 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5735 kIemNativeGstRegUse_ReadOnly);
5736
5737 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5738 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
5739 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
5740
5741 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
5742 return off;
5743}
5744
5745
5746
5747/*********************************************************************************************************************************
5748* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
5749*********************************************************************************************************************************/
5750
5751#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
5752 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
5753
5754/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
5755DECL_INLINE_THROW(uint32_t)
5756iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
5757{
5758 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5759 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
5760 kIemNativeGstRegUse_ForUpdate);
5761#ifdef RT_ARCH_AMD64
5762 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5763
5764 /* To the lowest byte of the register: mov r8, imm8 */
5765 if (iGRegEx < 16)
5766 {
5767 if (idxGstTmpReg >= 8)
5768 pbCodeBuf[off++] = X86_OP_REX_B;
5769 else if (idxGstTmpReg >= 4)
5770 pbCodeBuf[off++] = X86_OP_REX;
5771 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5772 pbCodeBuf[off++] = u8Value;
5773 }
5774 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5775 else if (idxGstTmpReg < 4)
5776 {
5777 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5778 pbCodeBuf[off++] = u8Value;
5779 }
5780 else
5781 {
5782 /* ror reg64, 8 */
5783 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5784 pbCodeBuf[off++] = 0xc1;
5785 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5786 pbCodeBuf[off++] = 8;
5787
5788 /* mov reg8, imm8 */
5789 if (idxGstTmpReg >= 8)
5790 pbCodeBuf[off++] = X86_OP_REX_B;
5791 else if (idxGstTmpReg >= 4)
5792 pbCodeBuf[off++] = X86_OP_REX;
5793 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5794 pbCodeBuf[off++] = u8Value;
5795
5796 /* rol reg64, 8 */
5797 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5798 pbCodeBuf[off++] = 0xc1;
5799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5800 pbCodeBuf[off++] = 8;
5801 }
5802
5803#elif defined(RT_ARCH_ARM64)
5804 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5805 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5806 if (iGRegEx < 16)
5807 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5808 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5809 else
5810 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5811 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5812 iemNativeRegFreeTmp(pReNative, idxImmReg);
5813
5814#else
5815# error "Port me!"
5816#endif
5817
5818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5819
5820 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
5821
5822 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5823 return off;
5824}
5825
5826
5827/*
5828 * General purpose register manipulation (add, sub).
5829 */
5830
5831#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5832 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5833
5834/** Emits code for IEM_MC_SUB_GREG_U16. */
5835DECL_INLINE_THROW(uint32_t)
5836iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5837{
5838 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5839 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5840 kIemNativeGstRegUse_ForUpdate);
5841
5842#ifdef RT_ARCH_AMD64
5843 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5844 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5845 if (idxGstTmpReg >= 8)
5846 pbCodeBuf[off++] = X86_OP_REX_B;
5847 if (uSubtrahend)
5848 {
5849 pbCodeBuf[off++] = 0xff; /* dec */
5850 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5851 }
5852 else
5853 {
5854 pbCodeBuf[off++] = 0x81;
5855 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5856 pbCodeBuf[off++] = uSubtrahend;
5857 pbCodeBuf[off++] = 0;
5858 }
5859
5860#else
5861 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5862 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5863
5864 /* sub tmp, gstgrp, uSubtrahend */
5865 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5866
5867 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5868 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5869
5870 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5871#endif
5872
5873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5874
5875 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5876
5877 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5878 return off;
5879}
5880
5881
5882#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5883 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5884
5885#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5886 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5887
5888/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5889DECL_INLINE_THROW(uint32_t)
5890iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5891{
5892 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5893 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5894 kIemNativeGstRegUse_ForUpdate);
5895
5896#ifdef RT_ARCH_AMD64
5897 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5898 if (f64Bit)
5899 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5900 else if (idxGstTmpReg >= 8)
5901 pbCodeBuf[off++] = X86_OP_REX_B;
5902 if (uSubtrahend == 1)
5903 {
5904 /* dec */
5905 pbCodeBuf[off++] = 0xff;
5906 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5907 }
5908 else if (uSubtrahend < 128)
5909 {
5910 pbCodeBuf[off++] = 0x83; /* sub */
5911 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5912 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5913 }
5914 else
5915 {
5916 pbCodeBuf[off++] = 0x81; /* sub */
5917 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5918 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5919 pbCodeBuf[off++] = 0;
5920 pbCodeBuf[off++] = 0;
5921 pbCodeBuf[off++] = 0;
5922 }
5923
5924#else
5925 /* sub tmp, gstgrp, uSubtrahend */
5926 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5927 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5928
5929#endif
5930
5931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5932
5933 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5934
5935 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5936 return off;
5937}
5938
5939
5940
5941/*********************************************************************************************************************************
5942* Builtin functions *
5943*********************************************************************************************************************************/
5944
5945/**
5946 * Built-in function that calls a C-implemention function taking zero arguments.
5947 */
5948static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
5949{
5950 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
5951 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
5952 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
5953 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
5954}
5955
5956
5957/**
5958 * Built-in function that checks for pending interrupts that can be delivered or
5959 * forced action flags.
5960 *
5961 * This triggers after the completion of an instruction, so EIP is already at
5962 * the next instruction. If an IRQ or important FF is pending, this will return
5963 * a non-zero status that stops TB execution.
5964 */
5965static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
5966{
5967 RT_NOREF(pCallEntry);
5968
5969 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
5970 and I'm too lazy to create a 'Fixed' version of that one. */
5971 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
5972 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
5973
5974 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
5975
5976 /* Again, we need to load the extended EFLAGS before we actually need them
5977 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
5978 loaded them inside the check, as the shadow state would not be correct
5979 when the code branches before the load. Ditto PC. */
5980 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5981 kIemNativeGstRegUse_ReadOnly);
5982
5983 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
5984
5985 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5986
5987 /*
5988 * Start by checking the local forced actions of the EMT we're on for IRQs
5989 * and other FFs that needs servicing.
5990 */
5991 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
5992 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
5993 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
5994 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5995 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
5996 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
5997 | VMCPU_FF_TLB_FLUSH
5998 | VMCPU_FF_UNHALT ),
5999 true /*fSetFlags*/);
6000 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
6001 uint32_t const offFixupJumpToVmCheck1 = off;
6002 off = iemNativeEmitJzToFixed(pReNative, off, 0);
6003
6004 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
6005 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
6006 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6007 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
6008 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
6009 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6010
6011 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
6012 suppressed by the CPU or not. */
6013 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
6014 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
6015 idxLabelReturnBreak);
6016
6017 /* We've got shadow flags set, so we must check that the PC they are valid
6018 for matches our current PC value. */
6019 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
6020 * a register. */
6021 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
6022 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
6023
6024 /*
6025 * Now check the force flags of the VM.
6026 */
6027 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
6028 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
6029 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
6030 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
6031 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
6032 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6033
6034 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
6035
6036 /*
6037 * We're good, no IRQs or FFs pending.
6038 */
6039 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6040 iemNativeRegFreeTmp(pReNative, idxEflReg);
6041 iemNativeRegFreeTmp(pReNative, idxPcReg);
6042
6043 return off;
6044}
6045
6046
6047/**
6048 * Built-in function checks if IEMCPU::fExec has the expected value.
6049 */
6050static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
6051{
6052 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
6053 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6054
6055 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6056 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
6057 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
6058 kIemNativeLabelType_ReturnBreak);
6059 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6060 return off;
6061}
6062
6063
6064
6065/*********************************************************************************************************************************
6066* The native code generator functions for each MC block. *
6067*********************************************************************************************************************************/
6068
6069
6070/*
6071 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6072 *
6073 * This should probably live in it's own file later, but lets see what the
6074 * compile times turn out to be first.
6075 */
6076#include "IEMNativeFunctions.cpp.h"
6077
6078
6079
6080/*********************************************************************************************************************************
6081* Recompiler Core. *
6082*********************************************************************************************************************************/
6083
6084
6085/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6086static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6087{
6088 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6089 pDis->cbCachedInstr += cbMaxRead;
6090 RT_NOREF(cbMinRead);
6091 return VERR_NO_DATA;
6092}
6093
6094
6095/**
6096 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6097 * @returns pszBuf.
6098 * @param fFlags The flags.
6099 * @param pszBuf The output buffer.
6100 * @param cbBuf The output buffer size. At least 32 bytes.
6101 */
6102DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6103{
6104 Assert(cbBuf >= 32);
6105 static RTSTRTUPLE const s_aModes[] =
6106 {
6107 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6108 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6109 /* [02] = */ { RT_STR_TUPLE("!2!") },
6110 /* [03] = */ { RT_STR_TUPLE("!3!") },
6111 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6112 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6113 /* [06] = */ { RT_STR_TUPLE("!6!") },
6114 /* [07] = */ { RT_STR_TUPLE("!7!") },
6115 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6116 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6117 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6118 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6119 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6120 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6121 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6122 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6123 /* [10] = */ { RT_STR_TUPLE("!10!") },
6124 /* [11] = */ { RT_STR_TUPLE("!11!") },
6125 /* [12] = */ { RT_STR_TUPLE("!12!") },
6126 /* [13] = */ { RT_STR_TUPLE("!13!") },
6127 /* [14] = */ { RT_STR_TUPLE("!14!") },
6128 /* [15] = */ { RT_STR_TUPLE("!15!") },
6129 /* [16] = */ { RT_STR_TUPLE("!16!") },
6130 /* [17] = */ { RT_STR_TUPLE("!17!") },
6131 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6132 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6133 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6134 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6135 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6136 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6137 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6138 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6139 };
6140 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6141 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6142 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6143
6144 pszBuf[off++] = ' ';
6145 pszBuf[off++] = 'C';
6146 pszBuf[off++] = 'P';
6147 pszBuf[off++] = 'L';
6148 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6149 Assert(off < 32);
6150
6151 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6152
6153 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6154 {
6155 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6156 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6157 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6158 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6159 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6160 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6161 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6162 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6163 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6164 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6165 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6166 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6167 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6168 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6169 };
6170 if (fFlags)
6171 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6172 if (s_aFlags[i].fFlag & fFlags)
6173 {
6174 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6175 pszBuf[off++] = ' ';
6176 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6177 off += s_aFlags[i].cchName;
6178 fFlags &= ~s_aFlags[i].fFlag;
6179 if (!fFlags)
6180 break;
6181 }
6182 pszBuf[off] = '\0';
6183
6184 return pszBuf;
6185}
6186
6187
6188DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
6189{
6190 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
6191
6192 char szDisBuf[512];
6193 DISSTATE Dis;
6194 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
6195 uint32_t const cNative = pTb->Native.cInstructions;
6196 uint32_t offNative = 0;
6197#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6198 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
6199#endif
6200 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6201 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6202 : DISCPUMODE_64BIT;
6203#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6204 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
6205#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6206 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
6207#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6208# error "Port me"
6209#else
6210 csh hDisasm = ~(size_t)0;
6211# if defined(RT_ARCH_AMD64)
6212 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
6213# elif defined(RT_ARCH_ARM64)
6214 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
6215# else
6216# error "Port me"
6217# endif
6218 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
6219#endif
6220
6221 /*
6222 * Print TB info.
6223 */
6224 pHlp->pfnPrintf(pHlp,
6225 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
6226 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
6227 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
6228 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
6229#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6230 if (pDbgInfo && pDbgInfo->cEntries > 1)
6231 {
6232 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
6233
6234 /*
6235 * This disassembly is driven by the debug info which follows the native
6236 * code and indicates when it starts with the next guest instructions,
6237 * where labels are and such things.
6238 */
6239 uint32_t idxThreadedCall = 0;
6240 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
6241 uint8_t idxRange = UINT8_MAX;
6242 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
6243 uint32_t offRange = 0;
6244 uint32_t offOpcodes = 0;
6245 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
6246 uint32_t const cDbgEntries = pDbgInfo->cEntries;
6247 uint32_t iDbgEntry = 1;
6248 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
6249
6250 while (offNative < cNative)
6251 {
6252 /* If we're at or have passed the point where the next chunk of debug
6253 info starts, process it. */
6254 if (offDbgNativeNext <= offNative)
6255 {
6256 offDbgNativeNext = UINT32_MAX;
6257 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
6258 {
6259 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
6260 {
6261 case kIemTbDbgEntryType_GuestInstruction:
6262 {
6263 /* Did the exec flag change? */
6264 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
6265 {
6266 pHlp->pfnPrintf(pHlp,
6267 " fExec change %#08x -> %#08x %s\n",
6268 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6269 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6270 szDisBuf, sizeof(szDisBuf)));
6271 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
6272 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6273 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6274 : DISCPUMODE_64BIT;
6275 }
6276
6277 /* New opcode range? We need to fend up a spurious debug info entry here for cases
6278 where the compilation was aborted before the opcode was recorded and the actual
6279 instruction was translated to a threaded call. This may happen when we run out
6280 of ranges, or when some complicated interrupts/FFs are found to be pending or
6281 similar. So, we just deal with it here rather than in the compiler code as it
6282 is a lot simpler to do up here. */
6283 if ( idxRange == UINT8_MAX
6284 || idxRange >= cRanges
6285 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
6286 {
6287 idxRange += 1;
6288 if (idxRange < cRanges)
6289 offRange = 0;
6290 else
6291 continue;
6292 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
6293 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
6294 + (pTb->aRanges[idxRange].idxPhysPage == 0
6295 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6296 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
6297 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6298 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
6299 pTb->aRanges[idxRange].idxPhysPage);
6300 }
6301
6302 /* Disassemble the instruction. */
6303 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
6304 uint32_t cbInstr = 1;
6305 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6306 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
6307 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6308 if (RT_SUCCESS(rc))
6309 {
6310 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6311 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6312 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6313 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6314
6315 static unsigned const s_offMarker = 55;
6316 static char const s_szMarker[] = " ; <--- guest";
6317 if (cch < s_offMarker)
6318 {
6319 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
6320 cch = s_offMarker;
6321 }
6322 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
6323 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
6324
6325 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
6326 }
6327 else
6328 {
6329 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
6330 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
6331 cbInstr = 1;
6332 }
6333 GCPhysPc += cbInstr;
6334 offOpcodes += cbInstr;
6335 offRange += cbInstr;
6336 continue;
6337 }
6338
6339 case kIemTbDbgEntryType_ThreadedCall:
6340 pHlp->pfnPrintf(pHlp,
6341 " Call #%u to %s (%u args)%s\n",
6342 idxThreadedCall,
6343 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6344 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6345 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
6346 idxThreadedCall++;
6347 continue;
6348
6349 case kIemTbDbgEntryType_GuestRegShadowing:
6350 {
6351 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
6352 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
6353 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
6354 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
6355 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6356 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
6357 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
6358 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
6359 else
6360 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
6361 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
6362 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6363 continue;
6364 }
6365
6366 case kIemTbDbgEntryType_Label:
6367 {
6368 const char *pszName = "what_the_fudge";
6369 const char *pszComment = "";
6370 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
6371 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
6372 {
6373 case kIemNativeLabelType_Return:
6374 pszName = "Return";
6375 break;
6376 case kIemNativeLabelType_ReturnBreak:
6377 pszName = "ReturnBreak";
6378 break;
6379 case kIemNativeLabelType_ReturnWithFlags:
6380 pszName = "ReturnWithFlags";
6381 break;
6382 case kIemNativeLabelType_NonZeroRetOrPassUp:
6383 pszName = "NonZeroRetOrPassUp";
6384 break;
6385 case kIemNativeLabelType_RaiseGp0:
6386 pszName = "RaiseGp0";
6387 break;
6388 case kIemNativeLabelType_If:
6389 pszName = "If";
6390 fNumbered = true;
6391 break;
6392 case kIemNativeLabelType_Else:
6393 pszName = "Else";
6394 fNumbered = true;
6395 pszComment = " ; regs state restored pre-if-block";
6396 break;
6397 case kIemNativeLabelType_Endif:
6398 pszName = "Endif";
6399 fNumbered = true;
6400 break;
6401 case kIemNativeLabelType_CheckIrq:
6402 pszName = "CheckIrq_CheckVM";
6403 fNumbered = true;
6404 break;
6405 case kIemNativeLabelType_Invalid:
6406 case kIemNativeLabelType_End:
6407 break;
6408 }
6409 if (fNumbered)
6410 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
6411 else
6412 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
6413 continue;
6414 }
6415
6416 case kIemTbDbgEntryType_NativeOffset:
6417 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
6418 Assert(offDbgNativeNext > offNative);
6419 break;
6420
6421 default:
6422 AssertFailed();
6423 }
6424 iDbgEntry++;
6425 break;
6426 }
6427 }
6428
6429 /*
6430 * Disassemble the next native instruction.
6431 */
6432 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6433# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6434 uint32_t cbInstr = sizeof(paNative[0]);
6435 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6436 if (RT_SUCCESS(rc))
6437 {
6438# if defined(RT_ARCH_AMD64)
6439 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6440 {
6441 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6442 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6443 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
6444 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6445 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6446 uInfo & 0x8000 ? " - recompiled" : "");
6447 else
6448 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6449 }
6450 else
6451# endif
6452 {
6453# ifdef RT_ARCH_AMD64
6454 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6455 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6456 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6457 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6458# elif defined(RT_ARCH_ARM64)
6459 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6460 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6461 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6462# else
6463# error "Port me"
6464# endif
6465 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6466 }
6467 }
6468 else
6469 {
6470# if defined(RT_ARCH_AMD64)
6471 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6472 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6473# elif defined(RT_ARCH_ARM64)
6474 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6475# else
6476# error "Port me"
6477# endif
6478 cbInstr = sizeof(paNative[0]);
6479 }
6480 offNative += cbInstr / sizeof(paNative[0]);
6481
6482# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6483 cs_insn *pInstr;
6484 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6485 (uintptr_t)pNativeCur, 1, &pInstr);
6486 if (cInstrs > 0)
6487 {
6488 Assert(cInstrs == 1);
6489# if defined(RT_ARCH_AMD64)
6490 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6491 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6492# else
6493 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6494 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6495# endif
6496 offNative += pInstr->size / sizeof(*pNativeCur);
6497 cs_free(pInstr, cInstrs);
6498 }
6499 else
6500 {
6501# if defined(RT_ARCH_AMD64)
6502 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6503 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6504# else
6505 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6506# endif
6507 offNative++;
6508 }
6509# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6510 }
6511 }
6512 else
6513#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
6514 {
6515 /*
6516 * No debug info, just disassemble the x86 code and then the native code.
6517 *
6518 * First the guest code:
6519 */
6520 for (unsigned i = 0; i < pTb->cRanges; i++)
6521 {
6522 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
6523 + (pTb->aRanges[i].idxPhysPage == 0
6524 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6525 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
6526 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6527 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
6528 unsigned off = pTb->aRanges[i].offOpcodes;
6529 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
6530 while (off < cbOpcodes)
6531 {
6532 uint32_t cbInstr = 1;
6533 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6534 &pTb->pabOpcodes[off], cbOpcodes - off,
6535 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6536 if (RT_SUCCESS(rc))
6537 {
6538 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6539 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6540 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6541 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6542 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
6543 GCPhysPc += cbInstr;
6544 off += cbInstr;
6545 }
6546 else
6547 {
6548 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
6549 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
6550 break;
6551 }
6552 }
6553 }
6554
6555 /*
6556 * Then the native code:
6557 */
6558 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
6559 while (offNative < cNative)
6560 {
6561 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6562# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6563 uint32_t cbInstr = sizeof(paNative[0]);
6564 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6565 if (RT_SUCCESS(rc))
6566 {
6567# if defined(RT_ARCH_AMD64)
6568 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6569 {
6570 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6571 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6572 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
6573 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6574 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6575 uInfo & 0x8000 ? " - recompiled" : "");
6576 else
6577 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6578 }
6579 else
6580# endif
6581 {
6582# ifdef RT_ARCH_AMD64
6583 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6584 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6585 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6586 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6587# elif defined(RT_ARCH_ARM64)
6588 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6589 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6590 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6591# else
6592# error "Port me"
6593# endif
6594 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6595 }
6596 }
6597 else
6598 {
6599# if defined(RT_ARCH_AMD64)
6600 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6601 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6602# else
6603 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6604# endif
6605 cbInstr = sizeof(paNative[0]);
6606 }
6607 offNative += cbInstr / sizeof(paNative[0]);
6608
6609# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6610 cs_insn *pInstr;
6611 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6612 (uintptr_t)pNativeCur, 1, &pInstr);
6613 if (cInstrs > 0)
6614 {
6615 Assert(cInstrs == 1);
6616# if defined(RT_ARCH_AMD64)
6617 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6618 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6619# else
6620 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6621 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6622# endif
6623 offNative += pInstr->size / sizeof(*pNativeCur);
6624 cs_free(pInstr, cInstrs);
6625 }
6626 else
6627 {
6628# if defined(RT_ARCH_AMD64)
6629 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6630 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6631# else
6632 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6633# endif
6634 offNative++;
6635 }
6636# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6637 }
6638 }
6639
6640#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6641 /* Cleanup. */
6642 cs_close(&hDisasm);
6643#endif
6644}
6645
6646
6647/**
6648 * Recompiles the given threaded TB into a native one.
6649 *
6650 * In case of failure the translation block will be returned as-is.
6651 *
6652 * @returns pTb.
6653 * @param pVCpu The cross context virtual CPU structure of the calling
6654 * thread.
6655 * @param pTb The threaded translation to recompile to native.
6656 */
6657DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
6658{
6659 /*
6660 * The first time thru, we allocate the recompiler state, the other times
6661 * we just need to reset it before using it again.
6662 */
6663 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
6664 if (RT_LIKELY(pReNative))
6665 iemNativeReInit(pReNative, pTb);
6666 else
6667 {
6668 pReNative = iemNativeInit(pVCpu, pTb);
6669 AssertReturn(pReNative, pTb);
6670 }
6671
6672 /*
6673 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
6674 * for aborting if an error happens.
6675 */
6676 uint32_t cCallsLeft = pTb->Thrd.cCalls;
6677#ifdef LOG_ENABLED
6678 uint32_t const cCallsOrg = cCallsLeft;
6679#endif
6680 uint32_t off = 0;
6681 int rc = VINF_SUCCESS;
6682 IEMNATIVE_TRY_SETJMP(pReNative, rc)
6683 {
6684 /*
6685 * Emit prolog code (fixed).
6686 */
6687 off = iemNativeEmitProlog(pReNative, off);
6688
6689 /*
6690 * Convert the calls to native code.
6691 */
6692#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6693 int32_t iGstInstr = -1;
6694 uint32_t fExec = pTb->fFlags;
6695#endif
6696 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
6697 while (cCallsLeft-- > 0)
6698 {
6699 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
6700
6701 /*
6702 * Debug info and assembly markup.
6703 */
6704#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6705 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
6706 fExec = pCallEntry->auParams[0];
6707 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6708 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
6709 {
6710 if (iGstInstr < (int32_t)pTb->cInstructions)
6711 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
6712 else
6713 Assert(iGstInstr == pTb->cInstructions);
6714 iGstInstr = pCallEntry->idxInstr;
6715 }
6716 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
6717#endif
6718#if defined(VBOX_STRICT) && 1
6719 off = iemNativeEmitMarker(pReNative, off,
6720 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
6721 pCallEntry->enmFunction));
6722#endif
6723
6724 /*
6725 * Actual work.
6726 */
6727 if (pfnRecom) /** @todo stats on this. */
6728 {
6729 //STAM_COUNTER_INC()
6730 off = pfnRecom(pReNative, off, pCallEntry);
6731 }
6732 else
6733 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
6734 Assert(off <= pReNative->cInstrBufAlloc);
6735 Assert(pReNative->cCondDepth == 0);
6736
6737 /*
6738 * Advance.
6739 */
6740 pCallEntry++;
6741 }
6742
6743 /*
6744 * Emit the epilog code.
6745 */
6746 uint32_t idxReturnLabel;
6747 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
6748
6749 /*
6750 * Generate special jump labels.
6751 */
6752 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
6753 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
6754 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
6755 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
6756 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
6757 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
6758 }
6759 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
6760 {
6761 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
6762 return pTb;
6763 }
6764 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
6765 Assert(off <= pReNative->cInstrBufAlloc);
6766
6767 /*
6768 * Make sure all labels has been defined.
6769 */
6770 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
6771#ifdef VBOX_STRICT
6772 uint32_t const cLabels = pReNative->cLabels;
6773 for (uint32_t i = 0; i < cLabels; i++)
6774 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
6775#endif
6776
6777 /*
6778 * Allocate executable memory, copy over the code we've generated.
6779 */
6780 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
6781 if (pTbAllocator->pDelayedFreeHead)
6782 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
6783
6784 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
6785 AssertReturn(paFinalInstrBuf, pTb);
6786 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
6787
6788 /*
6789 * Apply fixups.
6790 */
6791 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
6792 uint32_t const cFixups = pReNative->cFixups;
6793 for (uint32_t i = 0; i < cFixups; i++)
6794 {
6795 Assert(paFixups[i].off < off);
6796 Assert(paFixups[i].idxLabel < cLabels);
6797 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
6798 switch (paFixups[i].enmType)
6799 {
6800#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6801 case kIemNativeFixupType_Rel32:
6802 Assert(paFixups[i].off + 4 <= off);
6803 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6804 continue;
6805
6806#elif defined(RT_ARCH_ARM64)
6807 case kIemNativeFixupType_RelImm26At0:
6808 {
6809 Assert(paFixups[i].off < off);
6810 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6811 Assert(offDisp >= -262144 && offDisp < 262144);
6812 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6813 continue;
6814 }
6815
6816 case kIemNativeFixupType_RelImm19At5:
6817 {
6818 Assert(paFixups[i].off < off);
6819 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6820 Assert(offDisp >= -262144 && offDisp < 262144);
6821 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6822 continue;
6823 }
6824
6825 case kIemNativeFixupType_RelImm14At5:
6826 {
6827 Assert(paFixups[i].off < off);
6828 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6829 Assert(offDisp >= -8192 && offDisp < 8192);
6830 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
6831 continue;
6832 }
6833
6834#endif
6835 case kIemNativeFixupType_Invalid:
6836 case kIemNativeFixupType_End:
6837 break;
6838 }
6839 AssertFailed();
6840 }
6841
6842 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
6843
6844 /*
6845 * Convert the translation block.
6846 */
6847 //RT_BREAKPOINT();
6848 RTMemFree(pTb->Thrd.paCalls);
6849 pTb->Native.paInstructions = paFinalInstrBuf;
6850 pTb->Native.cInstructions = off;
6851 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
6852#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6853 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
6854 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
6855#endif
6856
6857 Assert(pTbAllocator->cThreadedTbs > 0);
6858 pTbAllocator->cThreadedTbs -= 1;
6859 pTbAllocator->cNativeTbs += 1;
6860 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
6861
6862#ifdef LOG_ENABLED
6863 /*
6864 * Disassemble to the log if enabled.
6865 */
6866 if (LogIs3Enabled())
6867 {
6868 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
6869 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
6870 }
6871#endif
6872
6873 return pTb;
6874}
6875
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette