VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101984

Last change on this file since 101984 was 101984, checked in by vboxsync, 15 months ago

VMM/IEM: Added a flush mask for guest register shadows to the IEM_MC_DEFER_TO_CIMPL_X_RET macros to better manage register optimizations when recompiling to native code. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 296.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101984 2023-11-08 15:56:18Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMNativeFunctions.h"
102
103
104/*
105 * Narrow down configs here to avoid wasting time on unused configs here.
106 * Note! Same checks in IEMAllThrdRecompiler.cpp.
107 */
108
109#ifndef IEM_WITH_CODE_TLB
110# error The code TLB must be enabled for the recompiler.
111#endif
112
113#ifndef IEM_WITH_DATA_TLB
114# error The data TLB must be enabled for the recompiler.
115#endif
116
117#ifndef IEM_WITH_SETJMP
118# error The setjmp approach must be enabled for the recompiler.
119#endif
120
121/** @todo eliminate this clang build hack. */
122#if RT_CLANG_PREREQ(4, 0)
123# pragma GCC diagnostic ignored "-Wunused-function"
124#endif
125
126
127
128/*********************************************************************************************************************************
129* Defined Constants And Macros *
130*********************************************************************************************************************************/
131/** Always count instructions for now. */
132#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
133
134
135/*********************************************************************************************************************************
136* Internal Functions *
137*********************************************************************************************************************************/
138#ifdef VBOX_STRICT
139static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
140 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
141#endif
142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
143static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
144static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
145#endif
146
147
148/*********************************************************************************************************************************
149* Executable Memory Allocator *
150*********************************************************************************************************************************/
151/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
152 * Use an alternative chunk sub-allocator that does store internal data
153 * in the chunk.
154 *
155 * Using the RTHeapSimple is not practial on newer darwin systems where
156 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
157 * memory. We would have to change the protection of the whole chunk for
158 * every call to RTHeapSimple, which would be rather expensive.
159 *
160 * This alternative implemenation let restrict page protection modifications
161 * to the pages backing the executable memory we just allocated.
162 */
163#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
164/** The chunk sub-allocation unit size in bytes. */
165#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
166/** The chunk sub-allocation unit size as a shift factor. */
167#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
168
169#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
170# ifdef IEMNATIVE_USE_GDB_JIT
171# define IEMNATIVE_USE_GDB_JIT_ET_DYN
172
173/** GDB JIT: Code entry. */
174typedef struct GDBJITCODEENTRY
175{
176 struct GDBJITCODEENTRY *pNext;
177 struct GDBJITCODEENTRY *pPrev;
178 uint8_t *pbSymFile;
179 uint64_t cbSymFile;
180} GDBJITCODEENTRY;
181
182/** GDB JIT: Actions. */
183typedef enum GDBJITACTIONS : uint32_t
184{
185 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
186} GDBJITACTIONS;
187
188/** GDB JIT: Descriptor. */
189typedef struct GDBJITDESCRIPTOR
190{
191 uint32_t uVersion;
192 GDBJITACTIONS enmAction;
193 GDBJITCODEENTRY *pRelevant;
194 GDBJITCODEENTRY *pHead;
195 /** Our addition: */
196 GDBJITCODEENTRY *pTail;
197} GDBJITDESCRIPTOR;
198
199/** GDB JIT: Our simple symbol file data. */
200typedef struct GDBJITSYMFILE
201{
202 Elf64_Ehdr EHdr;
203# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
204 Elf64_Shdr aShdrs[5];
205# else
206 Elf64_Shdr aShdrs[7];
207 Elf64_Phdr aPhdrs[2];
208# endif
209 /** The dwarf ehframe data for the chunk. */
210 uint8_t abEhFrame[512];
211 char szzStrTab[128];
212 Elf64_Sym aSymbols[3];
213# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
214 Elf64_Sym aDynSyms[2];
215 Elf64_Dyn aDyn[6];
216# endif
217} GDBJITSYMFILE;
218
219extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
220extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
221
222/** Init once for g_IemNativeGdbJitLock. */
223static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
224/** Init once for the critical section. */
225static RTCRITSECT g_IemNativeGdbJitLock;
226
227/** GDB reads the info here. */
228GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
229
230/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
231DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
232{
233 ASMNopPause();
234}
235
236/** @callback_method_impl{FNRTONCE} */
237static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
238{
239 RT_NOREF(pvUser);
240 return RTCritSectInit(&g_IemNativeGdbJitLock);
241}
242
243
244# endif /* IEMNATIVE_USE_GDB_JIT */
245
246/**
247 * Per-chunk unwind info for non-windows hosts.
248 */
249typedef struct IEMEXECMEMCHUNKEHFRAME
250{
251# ifdef IEMNATIVE_USE_LIBUNWIND
252 /** The offset of the FDA into abEhFrame. */
253 uintptr_t offFda;
254# else
255 /** 'struct object' storage area. */
256 uint8_t abObject[1024];
257# endif
258# ifdef IEMNATIVE_USE_GDB_JIT
259# if 0
260 /** The GDB JIT 'symbol file' data. */
261 GDBJITSYMFILE GdbJitSymFile;
262# endif
263 /** The GDB JIT list entry. */
264 GDBJITCODEENTRY GdbJitEntry;
265# endif
266 /** The dwarf ehframe data for the chunk. */
267 uint8_t abEhFrame[512];
268} IEMEXECMEMCHUNKEHFRAME;
269/** Pointer to per-chunk info info for non-windows hosts. */
270typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
271#endif
272
273
274/**
275 * An chunk of executable memory.
276 */
277typedef struct IEMEXECMEMCHUNK
278{
279#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
280 /** Number of free items in this chunk. */
281 uint32_t cFreeUnits;
282 /** Hint were to start searching for free space in the allocation bitmap. */
283 uint32_t idxFreeHint;
284#else
285 /** The heap handle. */
286 RTHEAPSIMPLE hHeap;
287#endif
288 /** Pointer to the chunk. */
289 void *pvChunk;
290#ifdef IN_RING3
291 /**
292 * Pointer to the unwind information.
293 *
294 * This is used during C++ throw and longjmp (windows and probably most other
295 * platforms). Some debuggers (windbg) makes use of it as well.
296 *
297 * Windows: This is allocated from hHeap on windows because (at least for
298 * AMD64) the UNWIND_INFO structure address in the
299 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
300 *
301 * Others: Allocated from the regular heap to avoid unnecessary executable data
302 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
303 void *pvUnwindInfo;
304#elif defined(IN_RING0)
305 /** Allocation handle. */
306 RTR0MEMOBJ hMemObj;
307#endif
308} IEMEXECMEMCHUNK;
309/** Pointer to a memory chunk. */
310typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
311
312
313/**
314 * Executable memory allocator for the native recompiler.
315 */
316typedef struct IEMEXECMEMALLOCATOR
317{
318 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
319 uint32_t uMagic;
320
321 /** The chunk size. */
322 uint32_t cbChunk;
323 /** The maximum number of chunks. */
324 uint32_t cMaxChunks;
325 /** The current number of chunks. */
326 uint32_t cChunks;
327 /** Hint where to start looking for available memory. */
328 uint32_t idxChunkHint;
329 /** Statistics: Current number of allocations. */
330 uint32_t cAllocations;
331
332 /** The total amount of memory available. */
333 uint64_t cbTotal;
334 /** Total amount of free memory. */
335 uint64_t cbFree;
336 /** Total amount of memory allocated. */
337 uint64_t cbAllocated;
338
339#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
340 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
341 *
342 * Since the chunk size is a power of two and the minimum chunk size is a lot
343 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
344 * require a whole number of uint64_t elements in the allocation bitmap. So,
345 * for sake of simplicity, they are allocated as one continous chunk for
346 * simplicity/laziness. */
347 uint64_t *pbmAlloc;
348 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
349 uint32_t cUnitsPerChunk;
350 /** Number of bitmap elements per chunk (for quickly locating the bitmap
351 * portion corresponding to an chunk). */
352 uint32_t cBitmapElementsPerChunk;
353#else
354 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
355 * @{ */
356 /** The size of the heap internal block header. This is used to adjust the
357 * request memory size to make sure there is exacly enough room for a header at
358 * the end of the blocks we allocate before the next 64 byte alignment line. */
359 uint32_t cbHeapBlockHdr;
360 /** The size of initial heap allocation required make sure the first
361 * allocation is correctly aligned. */
362 uint32_t cbHeapAlignTweak;
363 /** The alignment tweak allocation address. */
364 void *pvAlignTweak;
365 /** @} */
366#endif
367
368#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
369 /** Pointer to the array of unwind info running parallel to aChunks (same
370 * allocation as this structure, located after the bitmaps).
371 * (For Windows, the structures must reside in 32-bit RVA distance to the
372 * actual chunk, so they are allocated off the chunk.) */
373 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
374#endif
375
376 /** The allocation chunks. */
377 RT_FLEXIBLE_ARRAY_EXTENSION
378 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
379} IEMEXECMEMALLOCATOR;
380/** Pointer to an executable memory allocator. */
381typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
382
383/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
384#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
385
386
387static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
388
389
390/**
391 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
392 * the heap statistics.
393 */
394static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
395 uint32_t cbReq, uint32_t idxChunk)
396{
397 pExecMemAllocator->cAllocations += 1;
398 pExecMemAllocator->cbAllocated += cbReq;
399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
400 pExecMemAllocator->cbFree -= cbReq;
401#else
402 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
403#endif
404 pExecMemAllocator->idxChunkHint = idxChunk;
405
406#ifdef RT_OS_DARWIN
407 /*
408 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
409 * on darwin. So, we mark the pages returned as read+write after alloc and
410 * expect the caller to call iemExecMemAllocatorReadyForUse when done
411 * writing to the allocation.
412 *
413 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
414 * for details.
415 */
416 /** @todo detect if this is necessary... it wasn't required on 10.15 or
417 * whatever older version it was. */
418 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
419 AssertRC(rc);
420#endif
421
422 return pvRet;
423}
424
425
426#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
427static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
428 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
429{
430 /*
431 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
432 */
433 Assert(!(cToScan & 63));
434 Assert(!(idxFirst & 63));
435 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
436 pbmAlloc += idxFirst / 64;
437
438 /*
439 * Scan the bitmap for cReqUnits of consequtive clear bits
440 */
441 /** @todo This can probably be done more efficiently for non-x86 systems. */
442 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
443 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
444 {
445 uint32_t idxAddBit = 1;
446 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
447 idxAddBit++;
448 if (idxAddBit >= cReqUnits)
449 {
450 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
451
452 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
453 pChunk->cFreeUnits -= cReqUnits;
454 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
455
456 void * const pvRet = (uint8_t *)pChunk->pvChunk
457 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
458
459 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
460 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
461 }
462
463 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
464 }
465 return NULL;
466}
467#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
468
469
470static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
471{
472#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
473 /*
474 * Figure out how much to allocate.
475 */
476 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
477 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
478 {
479 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
480 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
481 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
482 {
483 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
484 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
485 if (pvRet)
486 return pvRet;
487 }
488 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
489 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
490 cReqUnits, idxChunk);
491 }
492#else
493 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
494 if (pvRet)
495 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
496#endif
497 return NULL;
498
499}
500
501
502/**
503 * Allocates @a cbReq bytes of executable memory.
504 *
505 * @returns Pointer to the memory, NULL if out of memory or other problem
506 * encountered.
507 * @param pVCpu The cross context virtual CPU structure of the calling
508 * thread.
509 * @param cbReq How many bytes are required.
510 */
511static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
512{
513 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
514 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
515 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
516
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /* What now? Prune native translation blocks from the cache? */
565 AssertFailed();
566 return NULL;
567}
568
569
570/** This is a hook that we may need later for changing memory protection back
571 * to readonly+exec */
572static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
573{
574#ifdef RT_OS_DARWIN
575 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
576 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
577 AssertRC(rc); RT_NOREF(pVCpu);
578
579 /*
580 * Flush the instruction cache:
581 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
582 */
583 /* sys_dcache_flush(pv, cb); - not necessary */
584 sys_icache_invalidate(pv, cb);
585#else
586 RT_NOREF(pVCpu, pv, cb);
587#endif
588}
589
590
591/**
592 * Frees executable memory.
593 */
594void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
595{
596 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
597 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
598 Assert(pv);
599#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
600 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
601#else
602 Assert(!((uintptr_t)pv & 63));
603#endif
604
605 /* Align the size as we did when allocating the block. */
606#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
607 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
608#else
609 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
610#endif
611
612 /* Free it / assert sanity. */
613#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
614 uint32_t const cChunks = pExecMemAllocator->cChunks;
615 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
616 bool fFound = false;
617 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
618 {
619 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
620 fFound = offChunk < cbChunk;
621 if (fFound)
622 {
623#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
624 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
625 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
626
627 /* Check that it's valid and free it. */
628 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
629 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
630 for (uint32_t i = 1; i < cReqUnits; i++)
631 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
632 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
633
634 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
635 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
636
637 /* Update the stats. */
638 pExecMemAllocator->cbAllocated -= cb;
639 pExecMemAllocator->cbFree += cb;
640 pExecMemAllocator->cAllocations -= 1;
641 return;
642#else
643 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
644 break;
645#endif
646 }
647 }
648# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
649 AssertFailed();
650# else
651 Assert(fFound);
652# endif
653#endif
654
655#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
656 /* Update stats while cb is freshly calculated.*/
657 pExecMemAllocator->cbAllocated -= cb;
658 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
659 pExecMemAllocator->cAllocations -= 1;
660
661 /* Free it. */
662 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
663#endif
664}
665
666
667
668#ifdef IN_RING3
669# ifdef RT_OS_WINDOWS
670
671/**
672 * Initializes the unwind info structures for windows hosts.
673 */
674static int
675iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
676 void *pvChunk, uint32_t idxChunk)
677{
678 RT_NOREF(pVCpu);
679
680 /*
681 * The AMD64 unwind opcodes.
682 *
683 * This is a program that starts with RSP after a RET instruction that
684 * ends up in recompiled code, and the operations we describe here will
685 * restore all non-volatile registers and bring RSP back to where our
686 * RET address is. This means it's reverse order from what happens in
687 * the prologue.
688 *
689 * Note! Using a frame register approach here both because we have one
690 * and but mainly because the UWOP_ALLOC_LARGE argument values
691 * would be a pain to write initializers for. On the positive
692 * side, we're impervious to changes in the the stack variable
693 * area can can deal with dynamic stack allocations if necessary.
694 */
695 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
696 {
697 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
698 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
699 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
700 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
701 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
702 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
703 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
704 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
705 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
706 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
707 };
708 union
709 {
710 IMAGE_UNWIND_INFO Info;
711 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
712 } s_UnwindInfo =
713 {
714 {
715 /* .Version = */ 1,
716 /* .Flags = */ 0,
717 /* .SizeOfProlog = */ 16, /* whatever */
718 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
719 /* .FrameRegister = */ X86_GREG_xBP,
720 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
721 }
722 };
723 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
724 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
725
726 /*
727 * Calc how much space we need and allocate it off the exec heap.
728 */
729 unsigned const cFunctionEntries = 1;
730 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
731 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
732# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
733 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
734 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
735 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
736# else
737 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
738 - pExecMemAllocator->cbHeapBlockHdr;
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
740 32 /*cbAlignment*/);
741# endif
742 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
743 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
744
745 /*
746 * Initialize the structures.
747 */
748 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
749
750 paFunctions[0].BeginAddress = 0;
751 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
752 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
753
754 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
755 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
756
757 /*
758 * Register it.
759 */
760 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
761 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
762
763 return VINF_SUCCESS;
764}
765
766
767# else /* !RT_OS_WINDOWS */
768
769/**
770 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
771 */
772DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
773{
774 if (iValue >= 64)
775 {
776 Assert(iValue < 0x2000);
777 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
778 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
779 }
780 else if (iValue >= 0)
781 *Ptr.pb++ = (uint8_t)iValue;
782 else if (iValue > -64)
783 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
784 else
785 {
786 Assert(iValue > -0x2000);
787 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
788 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
789 }
790 return Ptr;
791}
792
793
794/**
795 * Emits an ULEB128 encoded value (up to 64-bit wide).
796 */
797DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
798{
799 while (uValue >= 0x80)
800 {
801 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
802 uValue >>= 7;
803 }
804 *Ptr.pb++ = (uint8_t)uValue;
805 return Ptr;
806}
807
808
809/**
810 * Emits a CFA rule as register @a uReg + offset @a off.
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
813{
814 *Ptr.pb++ = DW_CFA_def_cfa;
815 Ptr = iemDwarfPutUleb128(Ptr, uReg);
816 Ptr = iemDwarfPutUleb128(Ptr, off);
817 return Ptr;
818}
819
820
821/**
822 * Emits a register (@a uReg) save location:
823 * CFA + @a off * data_alignment_factor
824 */
825DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
826{
827 if (uReg < 0x40)
828 *Ptr.pb++ = DW_CFA_offset | uReg;
829 else
830 {
831 *Ptr.pb++ = DW_CFA_offset_extended;
832 Ptr = iemDwarfPutUleb128(Ptr, uReg);
833 }
834 Ptr = iemDwarfPutUleb128(Ptr, off);
835 return Ptr;
836}
837
838
839# if 0 /* unused */
840/**
841 * Emits a register (@a uReg) save location, using signed offset:
842 * CFA + @a offSigned * data_alignment_factor
843 */
844DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
845{
846 *Ptr.pb++ = DW_CFA_offset_extended_sf;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
849 return Ptr;
850}
851# endif
852
853
854/**
855 * Initializes the unwind info section for non-windows hosts.
856 */
857static int
858iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
859 void *pvChunk, uint32_t idxChunk)
860{
861 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
862 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
863
864 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
865
866 /*
867 * Generate the CIE first.
868 */
869# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
870 uint8_t const iDwarfVer = 3;
871# else
872 uint8_t const iDwarfVer = 4;
873# endif
874 RTPTRUNION const PtrCie = Ptr;
875 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
876 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
877 *Ptr.pb++ = iDwarfVer; /* DwARF version */
878 *Ptr.pb++ = 0; /* Augmentation. */
879 if (iDwarfVer >= 4)
880 {
881 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
882 *Ptr.pb++ = 0; /* Segment selector size. */
883 }
884# ifdef RT_ARCH_AMD64
885 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
886# else
887 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
888# endif
889 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
890# ifdef RT_ARCH_AMD64
891 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
892# elif defined(RT_ARCH_ARM64)
893 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
894# else
895# error "port me"
896# endif
897 /* Initial instructions: */
898# ifdef RT_ARCH_AMD64
899 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
907# elif defined(RT_ARCH_ARM64)
908# if 1
909 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
910# else
911 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
912# endif
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
925 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
926 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
927# else
928# error "port me"
929# endif
930 while ((Ptr.u - PtrCie.u) & 3)
931 *Ptr.pb++ = DW_CFA_nop;
932 /* Finalize the CIE size. */
933 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
934
935 /*
936 * Generate an FDE for the whole chunk area.
937 */
938# ifdef IEMNATIVE_USE_LIBUNWIND
939 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
940# endif
941 RTPTRUNION const PtrFde = Ptr;
942 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
943 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
944 Ptr.pu32++;
945 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
946 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
947# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
948 *Ptr.pb++ = DW_CFA_nop;
949# endif
950 while ((Ptr.u - PtrFde.u) & 3)
951 *Ptr.pb++ = DW_CFA_nop;
952 /* Finalize the FDE size. */
953 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
954
955 /* Terminator entry. */
956 *Ptr.pu32++ = 0;
957 *Ptr.pu32++ = 0; /* just to be sure... */
958 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
959
960 /*
961 * Register it.
962 */
963# ifdef IEMNATIVE_USE_LIBUNWIND
964 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
965# else
966 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
967 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
968# endif
969
970# ifdef IEMNATIVE_USE_GDB_JIT
971 /*
972 * Now for telling GDB about this (experimental).
973 *
974 * This seems to work best with ET_DYN.
975 */
976 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
977# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
978 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
979 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
980# else
981 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
982 - pExecMemAllocator->cbHeapBlockHdr;
983 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
984# endif
985 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
986 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
987
988 RT_ZERO(*pSymFile);
989
990 /*
991 * The ELF header:
992 */
993 pSymFile->EHdr.e_ident[0] = ELFMAG0;
994 pSymFile->EHdr.e_ident[1] = ELFMAG1;
995 pSymFile->EHdr.e_ident[2] = ELFMAG2;
996 pSymFile->EHdr.e_ident[3] = ELFMAG3;
997 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
998 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
999 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1000 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1001# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1002 pSymFile->EHdr.e_type = ET_DYN;
1003# else
1004 pSymFile->EHdr.e_type = ET_REL;
1005# endif
1006# ifdef RT_ARCH_AMD64
1007 pSymFile->EHdr.e_machine = EM_AMD64;
1008# elif defined(RT_ARCH_ARM64)
1009 pSymFile->EHdr.e_machine = EM_AARCH64;
1010# else
1011# error "port me"
1012# endif
1013 pSymFile->EHdr.e_version = 1; /*?*/
1014 pSymFile->EHdr.e_entry = 0;
1015# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1016 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1017# else
1018 pSymFile->EHdr.e_phoff = 0;
1019# endif
1020 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1021 pSymFile->EHdr.e_flags = 0;
1022 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1023# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1024 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1025 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phentsize = 0;
1028 pSymFile->EHdr.e_phnum = 0;
1029# endif
1030 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1031 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1032 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1033
1034 uint32_t offStrTab = 0;
1035#define APPEND_STR(a_szStr) do { \
1036 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1037 offStrTab += sizeof(a_szStr); \
1038 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1039 } while (0)
1040#define APPEND_STR_FMT(a_szStr, ...) do { \
1041 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1042 offStrTab++; \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045
1046 /*
1047 * Section headers.
1048 */
1049 /* Section header #0: NULL */
1050 unsigned i = 0;
1051 APPEND_STR("");
1052 RT_ZERO(pSymFile->aShdrs[i]);
1053 i++;
1054
1055 /* Section header: .eh_frame */
1056 pSymFile->aShdrs[i].sh_name = offStrTab;
1057 APPEND_STR(".eh_frame");
1058 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1059 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1060# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1061 pSymFile->aShdrs[i].sh_offset
1062 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1063# else
1064 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1065 pSymFile->aShdrs[i].sh_offset = 0;
1066# endif
1067
1068 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1069 pSymFile->aShdrs[i].sh_link = 0;
1070 pSymFile->aShdrs[i].sh_info = 0;
1071 pSymFile->aShdrs[i].sh_addralign = 1;
1072 pSymFile->aShdrs[i].sh_entsize = 0;
1073 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1074 i++;
1075
1076 /* Section header: .shstrtab */
1077 unsigned const iShStrTab = i;
1078 pSymFile->EHdr.e_shstrndx = iShStrTab;
1079 pSymFile->aShdrs[i].sh_name = offStrTab;
1080 APPEND_STR(".shstrtab");
1081 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1082 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1083# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1084 pSymFile->aShdrs[i].sh_offset
1085 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1086# else
1087 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1088 pSymFile->aShdrs[i].sh_offset = 0;
1089# endif
1090 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1091 pSymFile->aShdrs[i].sh_link = 0;
1092 pSymFile->aShdrs[i].sh_info = 0;
1093 pSymFile->aShdrs[i].sh_addralign = 1;
1094 pSymFile->aShdrs[i].sh_entsize = 0;
1095 i++;
1096
1097 /* Section header: .symbols */
1098 pSymFile->aShdrs[i].sh_name = offStrTab;
1099 APPEND_STR(".symtab");
1100 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1101 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1102 pSymFile->aShdrs[i].sh_offset
1103 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1104 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1105 pSymFile->aShdrs[i].sh_link = iShStrTab;
1106 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1107 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1108 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1109 i++;
1110
1111# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".dynsym");
1115 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1124 i++;
1125# endif
1126
1127# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1128 /* Section header: .dynamic */
1129 pSymFile->aShdrs[i].sh_name = offStrTab;
1130 APPEND_STR(".dynamic");
1131 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1132 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1133 pSymFile->aShdrs[i].sh_offset
1134 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1135 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1136 pSymFile->aShdrs[i].sh_link = iShStrTab;
1137 pSymFile->aShdrs[i].sh_info = 0;
1138 pSymFile->aShdrs[i].sh_addralign = 1;
1139 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1140 i++;
1141# endif
1142
1143 /* Section header: .text */
1144 unsigned const iShText = i;
1145 pSymFile->aShdrs[i].sh_name = offStrTab;
1146 APPEND_STR(".text");
1147 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1148 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1149# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1150 pSymFile->aShdrs[i].sh_offset
1151 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1152# else
1153 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1154 pSymFile->aShdrs[i].sh_offset = 0;
1155# endif
1156 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1157 pSymFile->aShdrs[i].sh_link = 0;
1158 pSymFile->aShdrs[i].sh_info = 0;
1159 pSymFile->aShdrs[i].sh_addralign = 1;
1160 pSymFile->aShdrs[i].sh_entsize = 0;
1161 i++;
1162
1163 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1164
1165# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1166 /*
1167 * The program headers:
1168 */
1169 /* Everything in a single LOAD segment: */
1170 i = 0;
1171 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1172 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1173 pSymFile->aPhdrs[i].p_offset
1174 = pSymFile->aPhdrs[i].p_vaddr
1175 = pSymFile->aPhdrs[i].p_paddr = 0;
1176 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1177 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1178 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1179 i++;
1180 /* The .dynamic segment. */
1181 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1182 pSymFile->aPhdrs[i].p_flags = PF_R;
1183 pSymFile->aPhdrs[i].p_offset
1184 = pSymFile->aPhdrs[i].p_vaddr
1185 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1186 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1187 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1188 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1189 i++;
1190
1191 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1192
1193 /*
1194 * The dynamic section:
1195 */
1196 i = 0;
1197 pSymFile->aDyn[i].d_tag = DT_SONAME;
1198 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1199 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1202 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1205 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1206 i++;
1207 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1208 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1211 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_NULL;
1214 i++;
1215 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1216# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1217
1218 /*
1219 * Symbol tables:
1220 */
1221 /** @todo gdb doesn't seem to really like this ... */
1222 i = 0;
1223 pSymFile->aSymbols[i].st_name = 0;
1224 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1225 pSymFile->aSymbols[i].st_value = 0;
1226 pSymFile->aSymbols[i].st_size = 0;
1227 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1228 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1229# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1230 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1231# endif
1232 i++;
1233
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240 i++;
1241
1242 pSymFile->aSymbols[i].st_name = offStrTab;
1243 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1244# if 0
1245 pSymFile->aSymbols[i].st_shndx = iShText;
1246 pSymFile->aSymbols[i].st_value = 0;
1247# else
1248 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1249 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1250# endif
1251 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1252 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1253 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1254# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1255 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1256 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1257# endif
1258 i++;
1259
1260 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1261 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1262
1263 /*
1264 * The GDB JIT entry and informing GDB.
1265 */
1266 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1267# if 1
1268 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1269# else
1270 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1271# endif
1272
1273 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1274 RTCritSectEnter(&g_IemNativeGdbJitLock);
1275 pEhFrame->GdbJitEntry.pNext = NULL;
1276 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1277 if (__jit_debug_descriptor.pTail)
1278 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1279 else
1280 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1281 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1282 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1283
1284 /* Notify GDB: */
1285 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1286 __jit_debug_register_code();
1287 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1288 RTCritSectLeave(&g_IemNativeGdbJitLock);
1289
1290# else /* !IEMNATIVE_USE_GDB_JIT */
1291 RT_NOREF(pVCpu);
1292# endif /* !IEMNATIVE_USE_GDB_JIT */
1293
1294 return VINF_SUCCESS;
1295}
1296
1297# endif /* !RT_OS_WINDOWS */
1298#endif /* IN_RING3 */
1299
1300
1301/**
1302 * Adds another chunk to the executable memory allocator.
1303 *
1304 * This is used by the init code for the initial allocation and later by the
1305 * regular allocator function when it's out of memory.
1306 */
1307static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1308{
1309 /* Check that we've room for growth. */
1310 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1311 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1312
1313 /* Allocate a chunk. */
1314#ifdef RT_OS_DARWIN
1315 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1316#else
1317 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1318#endif
1319 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1320
1321#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1322 int rc = VINF_SUCCESS;
1323#else
1324 /* Initialize the heap for the chunk. */
1325 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1326 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1327 AssertRC(rc);
1328 if (RT_SUCCESS(rc))
1329 {
1330 /*
1331 * We want the memory to be aligned on 64 byte, so the first time thru
1332 * here we do some exploratory allocations to see how we can achieve this.
1333 * On subsequent runs we only make an initial adjustment allocation, if
1334 * necessary.
1335 *
1336 * Since we own the heap implementation, we know that the internal block
1337 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1338 * so all we need to wrt allocation size adjustments is to add 32 bytes
1339 * to the size, align up by 64 bytes, and subtract 32 bytes.
1340 *
1341 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1342 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1343 * allocation to force subsequent allocations to return 64 byte aligned
1344 * user areas.
1345 */
1346 if (!pExecMemAllocator->cbHeapBlockHdr)
1347 {
1348 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1349 pExecMemAllocator->cbHeapAlignTweak = 64;
1350 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1351 32 /*cbAlignment*/);
1352 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1353
1354 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1355 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1356 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1357 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1358 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1359
1360 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1361 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1362 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1363 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1364 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1365
1366 RTHeapSimpleFree(hHeap, pvTest2);
1367 RTHeapSimpleFree(hHeap, pvTest1);
1368 }
1369 else
1370 {
1371 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1372 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1373 }
1374 if (RT_SUCCESS(rc))
1375#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1376 {
1377 /*
1378 * Add the chunk.
1379 *
1380 * This must be done before the unwind init so windows can allocate
1381 * memory from the chunk when using the alternative sub-allocator.
1382 */
1383 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1384#ifdef IN_RING3
1385 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1386#endif
1387#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1388 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1389#else
1390 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1391 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1392 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1393 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1394#endif
1395
1396 pExecMemAllocator->cChunks = idxChunk + 1;
1397 pExecMemAllocator->idxChunkHint = idxChunk;
1398
1399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1400 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1401 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1402#else
1403 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1404 pExecMemAllocator->cbTotal += cbFree;
1405 pExecMemAllocator->cbFree += cbFree;
1406#endif
1407
1408#ifdef IN_RING3
1409 /*
1410 * Initialize the unwind information (this cannot really fail atm).
1411 * (This sets pvUnwindInfo.)
1412 */
1413 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1414 if (RT_SUCCESS(rc))
1415#endif
1416 {
1417 return VINF_SUCCESS;
1418 }
1419
1420#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1421 /* Just in case the impossible happens, undo the above up: */
1422 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1423 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1424 pExecMemAllocator->cChunks = idxChunk;
1425 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1426 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1427 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1428 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1429#endif
1430 }
1431#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 }
1433#endif
1434 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1435 RT_NOREF(pVCpu);
1436 return rc;
1437}
1438
1439
1440/**
1441 * Initializes the executable memory allocator for native recompilation on the
1442 * calling EMT.
1443 *
1444 * @returns VBox status code.
1445 * @param pVCpu The cross context virtual CPU structure of the calling
1446 * thread.
1447 * @param cbMax The max size of the allocator.
1448 * @param cbInitial The initial allocator size.
1449 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1450 * dependent).
1451 */
1452int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1453{
1454 /*
1455 * Validate input.
1456 */
1457 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1458 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1459 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1460 || cbChunk == 0
1461 || ( RT_IS_POWER_OF_TWO(cbChunk)
1462 && cbChunk >= _1M
1463 && cbChunk <= _256M
1464 && cbChunk <= cbMax),
1465 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1466 VERR_OUT_OF_RANGE);
1467
1468 /*
1469 * Adjust/figure out the chunk size.
1470 */
1471 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1472 {
1473 if (cbMax >= _256M)
1474 cbChunk = _64M;
1475 else
1476 {
1477 if (cbMax < _16M)
1478 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1479 else
1480 cbChunk = (uint32_t)cbMax / 4;
1481 if (!RT_IS_POWER_OF_TWO(cbChunk))
1482 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1483 }
1484 }
1485
1486 if (cbChunk > cbMax)
1487 cbMax = cbChunk;
1488 else
1489 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1490 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1491 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1492
1493 /*
1494 * Allocate and initialize the allocatore instance.
1495 */
1496 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1497#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1498 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1499 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1500 cbNeeded += cbBitmap * cMaxChunks;
1501 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1502 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1503#endif
1504#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1505 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1506 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1507#endif
1508 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1509 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1510 VERR_NO_MEMORY);
1511 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1512 pExecMemAllocator->cbChunk = cbChunk;
1513 pExecMemAllocator->cMaxChunks = cMaxChunks;
1514 pExecMemAllocator->cChunks = 0;
1515 pExecMemAllocator->idxChunkHint = 0;
1516 pExecMemAllocator->cAllocations = 0;
1517 pExecMemAllocator->cbTotal = 0;
1518 pExecMemAllocator->cbFree = 0;
1519 pExecMemAllocator->cbAllocated = 0;
1520#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1521 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1522 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1523 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1524 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1525#endif
1526#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1527 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1528#endif
1529 for (uint32_t i = 0; i < cMaxChunks; i++)
1530 {
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1533 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1534#else
1535 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1536#endif
1537 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1538#ifdef IN_RING0
1539 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1540#else
1541 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1542#endif
1543 }
1544 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1545
1546 /*
1547 * Do the initial allocations.
1548 */
1549 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1550 {
1551 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1552 AssertLogRelRCReturn(rc, rc);
1553 }
1554
1555 pExecMemAllocator->idxChunkHint = 0;
1556
1557 return VINF_SUCCESS;
1558}
1559
1560
1561/*********************************************************************************************************************************
1562* Native Recompilation *
1563*********************************************************************************************************************************/
1564
1565
1566/**
1567 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1568 */
1569IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1570{
1571 pVCpu->iem.s.cInstructions += idxInstr;
1572 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1573}
1574
1575
1576/**
1577 * Used by TB code when it wants to raise a \#GP(0).
1578 */
1579IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1580{
1581 pVCpu->iem.s.cInstructions += idxInstr;
1582 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1583#ifndef _MSC_VER
1584 return VINF_IEM_RAISED_XCPT; /* not reached */
1585#endif
1586}
1587
1588
1589/**
1590 * Reinitializes the native recompiler state.
1591 *
1592 * Called before starting a new recompile job.
1593 */
1594static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1595{
1596 pReNative->cLabels = 0;
1597 pReNative->bmLabelTypes = 0;
1598 pReNative->cFixups = 0;
1599#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1600 pReNative->pDbgInfo->cEntries = 0;
1601#endif
1602 pReNative->pTbOrg = pTb;
1603 pReNative->cCondDepth = 0;
1604 pReNative->uCondSeqNo = 0;
1605 pReNative->uCheckIrqSeqNo = 0;
1606
1607 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1608#if IEMNATIVE_HST_GREG_COUNT < 32
1609 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1610#endif
1611 ;
1612 pReNative->Core.bmHstRegsWithGstShadow = 0;
1613 pReNative->Core.bmGstRegShadows = 0;
1614 pReNative->Core.bmVars = 0;
1615 pReNative->Core.bmStack = 0;
1616 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1617 pReNative->Core.u64ArgVars = UINT64_MAX;
1618
1619 /* Full host register reinit: */
1620 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1621 {
1622 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1623 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1624 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1625 }
1626
1627 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1628 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1629#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1630 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1631#endif
1632#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1633 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1634#endif
1635 );
1636 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1637 {
1638 fRegs &= ~RT_BIT_32(idxReg);
1639 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1640 }
1641
1642 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1643#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1644 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1645#endif
1646#ifdef IEMNATIVE_REG_FIXED_TMP0
1647 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1648#endif
1649 return pReNative;
1650}
1651
1652
1653/**
1654 * Allocates and initializes the native recompiler state.
1655 *
1656 * This is called the first time an EMT wants to recompile something.
1657 *
1658 * @returns Pointer to the new recompiler state.
1659 * @param pVCpu The cross context virtual CPU structure of the calling
1660 * thread.
1661 * @param pTb The TB that's about to be recompiled.
1662 * @thread EMT(pVCpu)
1663 */
1664static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1665{
1666 VMCPU_ASSERT_EMT(pVCpu);
1667
1668 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1669 AssertReturn(pReNative, NULL);
1670
1671 /*
1672 * Try allocate all the buffers and stuff we need.
1673 */
1674 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1675 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1676 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1677#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1678 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1679#endif
1680 if (RT_LIKELY( pReNative->pInstrBuf
1681 && pReNative->paLabels
1682 && pReNative->paFixups)
1683#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1684 && pReNative->pDbgInfo
1685#endif
1686 )
1687 {
1688 /*
1689 * Set the buffer & array sizes on success.
1690 */
1691 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1692 pReNative->cLabelsAlloc = _8K;
1693 pReNative->cFixupsAlloc = _16K;
1694#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1695 pReNative->cDbgInfoAlloc = _16K;
1696#endif
1697
1698 /*
1699 * Done, just need to save it and reinit it.
1700 */
1701 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1702 return iemNativeReInit(pReNative, pTb);
1703 }
1704
1705 /*
1706 * Failed. Cleanup and return.
1707 */
1708 AssertFailed();
1709 RTMemFree(pReNative->pInstrBuf);
1710 RTMemFree(pReNative->paLabels);
1711 RTMemFree(pReNative->paFixups);
1712#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1713 RTMemFree(pReNative->pDbgInfo);
1714#endif
1715 RTMemFree(pReNative);
1716 return NULL;
1717}
1718
1719
1720/**
1721 * Creates a label
1722 *
1723 * If the label does not yet have a defined position,
1724 * call iemNativeLabelDefine() later to set it.
1725 *
1726 * @returns Label ID. Throws VBox status code on failure, so no need to check
1727 * the return value.
1728 * @param pReNative The native recompile state.
1729 * @param enmType The label type.
1730 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1731 * label is not yet defined (default).
1732 * @param uData Data associated with the lable. Only applicable to
1733 * certain type of labels. Default is zero.
1734 */
1735DECL_HIDDEN_THROW(uint32_t)
1736iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1737 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1738{
1739 /*
1740 * Locate existing label definition.
1741 *
1742 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1743 * and uData is zero.
1744 */
1745 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1746 uint32_t const cLabels = pReNative->cLabels;
1747 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1748#ifndef VBOX_STRICT
1749 && offWhere == UINT32_MAX
1750 && uData == 0
1751#endif
1752 )
1753 {
1754 /** @todo Since this is only used for labels with uData = 0, just use a
1755 * lookup array? */
1756 for (uint32_t i = 0; i < cLabels; i++)
1757 if ( paLabels[i].enmType == enmType
1758 && paLabels[i].uData == uData)
1759 {
1760#ifdef VBOX_STRICT
1761 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1762 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1763#endif
1764 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1765 return i;
1766 }
1767 }
1768
1769 /*
1770 * Make sure we've got room for another label.
1771 */
1772 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1773 { /* likely */ }
1774 else
1775 {
1776 uint32_t cNew = pReNative->cLabelsAlloc;
1777 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1778 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1779 cNew *= 2;
1780 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1781 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1782 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1783 pReNative->paLabels = paLabels;
1784 pReNative->cLabelsAlloc = cNew;
1785 }
1786
1787 /*
1788 * Define a new label.
1789 */
1790 paLabels[cLabels].off = offWhere;
1791 paLabels[cLabels].enmType = enmType;
1792 paLabels[cLabels].uData = uData;
1793 pReNative->cLabels = cLabels + 1;
1794
1795 Assert((unsigned)enmType < 64);
1796 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1797
1798 if (offWhere != UINT32_MAX)
1799 {
1800#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1801 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1802 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1803#endif
1804 }
1805 return cLabels;
1806}
1807
1808
1809/**
1810 * Defines the location of an existing label.
1811 *
1812 * @param pReNative The native recompile state.
1813 * @param idxLabel The label to define.
1814 * @param offWhere The position.
1815 */
1816DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1817{
1818 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1819 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1820 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1821 pLabel->off = offWhere;
1822#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1823 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1824 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1825#endif
1826}
1827
1828
1829/**
1830 * Looks up a lable.
1831 *
1832 * @returns Label ID if found, UINT32_MAX if not.
1833 */
1834static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1835 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1836{
1837 Assert((unsigned)enmType < 64);
1838 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1839 {
1840 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1841 uint32_t const cLabels = pReNative->cLabels;
1842 for (uint32_t i = 0; i < cLabels; i++)
1843 if ( paLabels[i].enmType == enmType
1844 && paLabels[i].uData == uData
1845 && ( paLabels[i].off == offWhere
1846 || offWhere == UINT32_MAX
1847 || paLabels[i].off == UINT32_MAX))
1848 return i;
1849 }
1850 return UINT32_MAX;
1851}
1852
1853
1854/**
1855 * Adds a fixup.
1856 *
1857 * @throws VBox status code (int) on failure.
1858 * @param pReNative The native recompile state.
1859 * @param offWhere The instruction offset of the fixup location.
1860 * @param idxLabel The target label ID for the fixup.
1861 * @param enmType The fixup type.
1862 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1863 */
1864DECL_HIDDEN_THROW(void)
1865iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1866 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1867{
1868 Assert(idxLabel <= UINT16_MAX);
1869 Assert((unsigned)enmType <= UINT8_MAX);
1870
1871 /*
1872 * Make sure we've room.
1873 */
1874 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1875 uint32_t const cFixups = pReNative->cFixups;
1876 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1877 { /* likely */ }
1878 else
1879 {
1880 uint32_t cNew = pReNative->cFixupsAlloc;
1881 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1882 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1883 cNew *= 2;
1884 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1885 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1886 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1887 pReNative->paFixups = paFixups;
1888 pReNative->cFixupsAlloc = cNew;
1889 }
1890
1891 /*
1892 * Add the fixup.
1893 */
1894 paFixups[cFixups].off = offWhere;
1895 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1896 paFixups[cFixups].enmType = enmType;
1897 paFixups[cFixups].offAddend = offAddend;
1898 pReNative->cFixups = cFixups + 1;
1899}
1900
1901
1902/**
1903 * Slow code path for iemNativeInstrBufEnsure.
1904 */
1905DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1906{
1907 /* Double the buffer size till we meet the request. */
1908 uint32_t cNew = pReNative->cInstrBufAlloc;
1909 AssertReturn(cNew > 0, NULL);
1910 do
1911 cNew *= 2;
1912 while (cNew < off + cInstrReq);
1913
1914 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1915#ifdef RT_ARCH_ARM64
1916 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1917#else
1918 uint32_t const cbMaxInstrBuf = _2M;
1919#endif
1920 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1921
1922 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1923 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1924
1925 pReNative->cInstrBufAlloc = cNew;
1926 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1927}
1928
1929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1930
1931/**
1932 * Grows the static debug info array used during recompilation.
1933 *
1934 * @returns Pointer to the new debug info block; throws VBox status code on
1935 * failure, so no need to check the return value.
1936 */
1937DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1938{
1939 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1940 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1941 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1942 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1943 pReNative->pDbgInfo = pDbgInfo;
1944 pReNative->cDbgInfoAlloc = cNew;
1945 return pDbgInfo;
1946}
1947
1948
1949/**
1950 * Adds a new debug info uninitialized entry, returning the pointer to it.
1951 */
1952DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1953{
1954 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1955 { /* likely */ }
1956 else
1957 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1958 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1959}
1960
1961
1962/**
1963 * Debug Info: Adds a native offset record, if necessary.
1964 */
1965static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1966{
1967 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1968
1969 /*
1970 * Search backwards to see if we've got a similar record already.
1971 */
1972 uint32_t idx = pDbgInfo->cEntries;
1973 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1974 while (idx-- > idxStop)
1975 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1976 {
1977 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1978 return;
1979 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1980 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1981 break;
1982 }
1983
1984 /*
1985 * Add it.
1986 */
1987 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1988 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1989 pEntry->NativeOffset.offNative = off;
1990}
1991
1992
1993/**
1994 * Debug Info: Record info about a label.
1995 */
1996static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
1997{
1998 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1999 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2000 pEntry->Label.uUnused = 0;
2001 pEntry->Label.enmLabel = (uint8_t)enmType;
2002 pEntry->Label.uData = uData;
2003}
2004
2005
2006/**
2007 * Debug Info: Record info about a threaded call.
2008 */
2009static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2010{
2011 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2012 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2013 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2014 pEntry->ThreadedCall.uUnused = 0;
2015 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2016}
2017
2018
2019/**
2020 * Debug Info: Record info about a new guest instruction.
2021 */
2022static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2023{
2024 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2025 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2026 pEntry->GuestInstruction.uUnused = 0;
2027 pEntry->GuestInstruction.fExec = fExec;
2028}
2029
2030
2031/**
2032 * Debug Info: Record info about guest register shadowing.
2033 */
2034static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2035 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2036{
2037 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2038 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2039 pEntry->GuestRegShadowing.uUnused = 0;
2040 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2041 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2042 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2043}
2044
2045#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2046
2047
2048/*********************************************************************************************************************************
2049* Register Allocator *
2050*********************************************************************************************************************************/
2051
2052/**
2053 * Register parameter indexes (indexed by argument number).
2054 */
2055DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2056{
2057 IEMNATIVE_CALL_ARG0_GREG,
2058 IEMNATIVE_CALL_ARG1_GREG,
2059 IEMNATIVE_CALL_ARG2_GREG,
2060 IEMNATIVE_CALL_ARG3_GREG,
2061#if defined(IEMNATIVE_CALL_ARG4_GREG)
2062 IEMNATIVE_CALL_ARG4_GREG,
2063# if defined(IEMNATIVE_CALL_ARG5_GREG)
2064 IEMNATIVE_CALL_ARG5_GREG,
2065# if defined(IEMNATIVE_CALL_ARG6_GREG)
2066 IEMNATIVE_CALL_ARG6_GREG,
2067# if defined(IEMNATIVE_CALL_ARG7_GREG)
2068 IEMNATIVE_CALL_ARG7_GREG,
2069# endif
2070# endif
2071# endif
2072#endif
2073};
2074
2075/**
2076 * Call register masks indexed by argument count.
2077 */
2078DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2079{
2080 0,
2081 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2083 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2084 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2085 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2086#if defined(IEMNATIVE_CALL_ARG4_GREG)
2087 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2088 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2089# if defined(IEMNATIVE_CALL_ARG5_GREG)
2090 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2091 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2092# if defined(IEMNATIVE_CALL_ARG6_GREG)
2093 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2094 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2095 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2096# if defined(IEMNATIVE_CALL_ARG7_GREG)
2097 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2098 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2099 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2100# endif
2101# endif
2102# endif
2103#endif
2104};
2105
2106#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2107/**
2108 * BP offset of the stack argument slots.
2109 *
2110 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2111 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2112 */
2113DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2114{
2115 IEMNATIVE_FP_OFF_STACK_ARG0,
2116# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2117 IEMNATIVE_FP_OFF_STACK_ARG1,
2118# endif
2119# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2120 IEMNATIVE_FP_OFF_STACK_ARG2,
2121# endif
2122# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2123 IEMNATIVE_FP_OFF_STACK_ARG3,
2124# endif
2125};
2126AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2127#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2128
2129/**
2130 * Info about shadowed guest register values.
2131 * @see IEMNATIVEGSTREG
2132 */
2133static struct
2134{
2135 /** Offset in VMCPU. */
2136 uint32_t off;
2137 /** The field size. */
2138 uint8_t cb;
2139 /** Name (for logging). */
2140 const char *pszName;
2141} const g_aGstShadowInfo[] =
2142{
2143#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2144 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2145 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2146 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2147 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2148 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2160 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2161 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2162 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2163 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2164 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2165 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2166 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2167 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2168 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2169 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2170 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2171 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2172 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2173 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2174 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2175 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2176 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2177 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2178 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2179 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2180#undef CPUMCTX_OFF_AND_SIZE
2181};
2182AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2183
2184
2185/** Host CPU general purpose register names. */
2186DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2187{
2188#ifdef RT_ARCH_AMD64
2189 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2190#elif RT_ARCH_ARM64
2191 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2192 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2193#else
2194# error "port me"
2195#endif
2196};
2197
2198
2199DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2200 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2201{
2202 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2203
2204 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2205 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2206 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2207 return (uint8_t)idxReg;
2208}
2209
2210
2211/**
2212 * Tries to locate a suitable register in the given register mask.
2213 *
2214 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2215 * failed.
2216 *
2217 * @returns Host register number on success, returns UINT8_MAX on failure.
2218 */
2219static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2220{
2221 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2222 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2223 if (fRegs)
2224 {
2225 /** @todo pick better here: */
2226 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2227
2228 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2229 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2230 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2231 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2232
2233 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2234 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2235 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2236 return idxReg;
2237 }
2238 return UINT8_MAX;
2239}
2240
2241
2242/**
2243 * Locate a register, possibly freeing one up.
2244 *
2245 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2246 * failed.
2247 *
2248 * @returns Host register number on success. Returns UINT8_MAX if no registers
2249 * found, the caller is supposed to deal with this and raise a
2250 * allocation type specific status code (if desired).
2251 *
2252 * @throws VBox status code if we're run into trouble spilling a variable of
2253 * recording debug info. Does NOT throw anything if we're out of
2254 * registers, though.
2255 */
2256static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2257 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2258{
2259 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2260 Assert(!(fRegMask & ~IEMNATIVE_REG_FIXED_MASK));
2261
2262 /*
2263 * Try a freed register that's shadowing a guest register
2264 */
2265 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2266 if (fRegs)
2267 {
2268 unsigned const idxReg = (fPreferVolatile
2269 ? ASMBitFirstSetU32(fRegs)
2270 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2271 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2272 - 1;
2273
2274 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2275 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2276 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2277 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2278
2279 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2280 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2281 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2282 return idxReg;
2283 }
2284
2285 /*
2286 * Try free up a variable that's in a register.
2287 *
2288 * We do two rounds here, first evacuating variables we don't need to be
2289 * saved on the stack, then in the second round move things to the stack.
2290 */
2291 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2292 {
2293 uint32_t fVars = pReNative->Core.bmVars;
2294 while (fVars)
2295 {
2296 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2297 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2298 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2299 && (RT_BIT_32(idxReg) & fRegMask)
2300 && ( iLoop == 0
2301 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2302 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2303 {
2304 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2305 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2306 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2307 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2308 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2309
2310 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2311 {
2312 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
2313 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2314 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff,
2315 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2316 - IEMNATIVE_FP_OFF_STACK_VARS,
2317 idxReg);
2318 }
2319
2320 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2321 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2322 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2323 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2324 return idxReg;
2325 }
2326 fVars &= ~RT_BIT_32(idxVar);
2327 }
2328 }
2329
2330 return UINT8_MAX;
2331}
2332
2333
2334/**
2335 * Moves a variable to a different register or spills it onto the stack.
2336 *
2337 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2338 * kinds can easily be recreated if needed later.
2339 *
2340 * @returns The new code buffer position, UINT32_MAX on failure.
2341 * @param pReNative The native recompile state.
2342 * @param off The current code buffer position.
2343 * @param idxVar The variable index.
2344 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2345 * call-volatile registers.
2346 */
2347static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2348 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2349{
2350 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2351 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2352
2353 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2354 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2355 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2356 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2357 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2358 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2359 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2360 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2361
2362
2363 /** @todo Add statistics on this.*/
2364 /** @todo Implement basic variable liveness analysis (python) so variables
2365 * can be freed immediately once no longer used. This has the potential to
2366 * be trashing registers and stack for dead variables. */
2367
2368 /*
2369 * First try move it to a different register, as that's cheaper.
2370 */
2371 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2372 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2373 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2374 if (fRegs)
2375 {
2376 /* Avoid using shadow registers, if possible. */
2377 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2378 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2379 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2380
2381 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2382 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2383 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2384 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2385 if (fGstRegShadows)
2386 {
2387 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2388 while (fGstRegShadows)
2389 {
2390 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2391 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2392
2393 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2394 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2395 }
2396 }
2397
2398 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2399 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2400 }
2401 /*
2402 * Otherwise we must spill the register onto the stack.
2403 */
2404 else
2405 {
2406 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2407 off = iemNativeEmitStoreGprByBp(pReNative, off,
2408 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2409 - IEMNATIVE_FP_OFF_STACK_VARS,
2410 idxRegOld);
2411
2412 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2413 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2414 }
2415
2416 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2417 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2418 return off;
2419}
2420
2421
2422/**
2423 * Allocates a temporary host general purpose register.
2424 *
2425 * This may emit code to save register content onto the stack in order to free
2426 * up a register.
2427 *
2428 * @returns The host register number; throws VBox status code on failure,
2429 * so no need to check the return value.
2430 * @param pReNative The native recompile state.
2431 * @param poff Pointer to the variable with the code buffer position.
2432 * This will be update if we need to move a variable from
2433 * register to stack in order to satisfy the request.
2434 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2435 * registers (@c true, default) or the other way around
2436 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2437 */
2438DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2439{
2440 /*
2441 * Try find a completely unused register, preferably a call-volatile one.
2442 */
2443 uint8_t idxReg;
2444 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2445 & ~pReNative->Core.bmHstRegsWithGstShadow
2446 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2447 if (fRegs)
2448 {
2449 if (fPreferVolatile)
2450 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2451 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2452 else
2453 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2454 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2455 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2456 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2457 }
2458 else
2459 {
2460 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2461 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2462 }
2463 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2464}
2465
2466
2467/**
2468 * Allocates a temporary register for loading an immediate value into.
2469 *
2470 * This will emit code to load the immediate, unless there happens to be an
2471 * unused register with the value already loaded.
2472 *
2473 * The caller will not modify the returned register, it must be considered
2474 * read-only. Free using iemNativeRegFreeTmpImm.
2475 *
2476 * @returns The host register number; throws VBox status code on failure, so no
2477 * need to check the return value.
2478 * @param pReNative The native recompile state.
2479 * @param poff Pointer to the variable with the code buffer position.
2480 * @param uImm The immediate value that the register must hold upon
2481 * return.
2482 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2483 * registers (@c true, default) or the other way around
2484 * (@c false).
2485 *
2486 * @note Reusing immediate values has not been implemented yet.
2487 */
2488DECL_HIDDEN_THROW(uint8_t)
2489iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2490{
2491 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2492 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2493 return idxReg;
2494}
2495
2496
2497/**
2498 * Marks host register @a idxHstReg as containing a shadow copy of guest
2499 * register @a enmGstReg.
2500 *
2501 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2502 * host register before calling.
2503 */
2504DECL_FORCE_INLINE(void)
2505iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2506{
2507 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2508
2509 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2510 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2511 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2512 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2513#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2514 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2515 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2516#else
2517 RT_NOREF(off);
2518#endif
2519}
2520
2521
2522/**
2523 * Clear any guest register shadow claims from @a idxHstReg.
2524 *
2525 * The register does not need to be shadowing any guest registers.
2526 */
2527DECL_FORCE_INLINE(void)
2528iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2529{
2530 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2531 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2532 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2533 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2534
2535#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2536 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2537 if (fGstRegs)
2538 {
2539 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2540 while (fGstRegs)
2541 {
2542 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2543 fGstRegs &= ~RT_BIT_64(iGstReg);
2544 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2545 }
2546 }
2547#else
2548 RT_NOREF(off);
2549#endif
2550
2551 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2552 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2553 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2554}
2555
2556
2557/**
2558 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2559 * to @a idxRegTo.
2560 */
2561DECL_FORCE_INLINE(void)
2562iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2563 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2564{
2565 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2566 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2567 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2568 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2569 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2570
2571 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2572 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2573 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2574#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2575 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2576 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2577#else
2578 RT_NOREF(off);
2579#endif
2580}
2581
2582
2583/**
2584 * Allocates a temporary host general purpose register for keeping a guest
2585 * register value.
2586 *
2587 * Since we may already have a register holding the guest register value,
2588 * code will be emitted to do the loading if that's not the case. Code may also
2589 * be emitted if we have to free up a register to satify the request.
2590 *
2591 * @returns The host register number; throws VBox status code on failure, so no
2592 * need to check the return value.
2593 * @param pReNative The native recompile state.
2594 * @param poff Pointer to the variable with the code buffer
2595 * position. This will be update if we need to move a
2596 * variable from register to stack in order to satisfy
2597 * the request.
2598 * @param enmGstReg The guest register that will is to be updated.
2599 * @param enmIntendedUse How the caller will be using the host register.
2600 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2601 */
2602DECL_HIDDEN_THROW(uint8_t)
2603iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2604 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2605{
2606 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2607#ifdef LOG_ENABLED
2608 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2609#endif
2610
2611 /*
2612 * First check if the guest register value is already in a host register.
2613 */
2614 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2615 {
2616 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2617 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2618 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2619 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2620
2621 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2622 {
2623 /*
2624 * If the register will trash the guest shadow copy, try find a
2625 * completely unused register we can use instead. If that fails,
2626 * we need to disassociate the host reg from the guest reg.
2627 */
2628 /** @todo would be nice to know if preserving the register is in any way helpful. */
2629 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2630 && ( ~pReNative->Core.bmHstRegs
2631 & ~pReNative->Core.bmHstRegsWithGstShadow
2632 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2633 {
2634 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2635
2636 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2637
2638 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2639 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2640 g_apszIemNativeHstRegNames[idxRegNew]));
2641 idxReg = idxRegNew;
2642 }
2643 else
2644 {
2645 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2646 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2647 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2648 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2649 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2650 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2651 else
2652 {
2653 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2654 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2655 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2656 }
2657 }
2658 }
2659 else
2660 {
2661 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2662 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2663
2664 /*
2665 * Allocate a new register, copy the value and, if updating, the
2666 * guest shadow copy assignment to the new register.
2667 */
2668 /** @todo share register for readonly access. */
2669 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2670
2671 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2672
2673 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2674 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2675 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2676 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2677 else
2678 {
2679 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2680 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2681 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2682 g_apszIemNativeHstRegNames[idxRegNew]));
2683 }
2684 idxReg = idxRegNew;
2685 }
2686
2687#ifdef VBOX_STRICT
2688 /* Strict builds: Check that the value is correct. */
2689 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2690#endif
2691
2692 return idxReg;
2693 }
2694
2695 /*
2696 * Allocate a new register, load it with the guest value and designate it as a copy of the
2697 */
2698 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2699
2700 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2701
2702 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2703 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2704 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2705 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2706
2707 return idxRegNew;
2708}
2709
2710
2711/**
2712 * Allocates a temporary host general purpose register that already holds the
2713 * given guest register value.
2714 *
2715 * The use case for this function is places where the shadowing state cannot be
2716 * modified due to branching and such. This will fail if the we don't have a
2717 * current shadow copy handy or if it's incompatible. The only code that will
2718 * be emitted here is value checking code in strict builds.
2719 *
2720 * The intended use can only be readonly!
2721 *
2722 * @returns The host register number, UINT8_MAX if not present.
2723 * @param pReNative The native recompile state.
2724 * @param poff Pointer to the instruction buffer offset.
2725 * Will be updated in strict builds if a register is
2726 * found.
2727 * @param enmGstReg The guest register that will is to be updated.
2728 * @note In strict builds, this may throw instruction buffer growth failures.
2729 * Non-strict builds will not throw anything.
2730 * @sa iemNativeRegAllocTmpForGuestReg
2731 */
2732DECL_HIDDEN_THROW(uint8_t)
2733iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2734{
2735 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2736
2737 /*
2738 * First check if the guest register value is already in a host register.
2739 */
2740 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2741 {
2742 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2743 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2744 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2745 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2746
2747 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2748 {
2749 /*
2750 * We only do readonly use here, so easy compared to the other
2751 * variant of this code.
2752 */
2753 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2754 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2755 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2756 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2757 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2758
2759#ifdef VBOX_STRICT
2760 /* Strict builds: Check that the value is correct. */
2761 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2762#else
2763 RT_NOREF(poff);
2764#endif
2765 return idxReg;
2766 }
2767 }
2768
2769 return UINT8_MAX;
2770}
2771
2772
2773DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2774
2775
2776/**
2777 * Allocates argument registers for a function call.
2778 *
2779 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2780 * need to check the return value.
2781 * @param pReNative The native recompile state.
2782 * @param off The current code buffer offset.
2783 * @param cArgs The number of arguments the function call takes.
2784 */
2785DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2786{
2787 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2788 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2789 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2790 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2791
2792 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2793 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2794 else if (cArgs == 0)
2795 return true;
2796
2797 /*
2798 * Do we get luck and all register are free and not shadowing anything?
2799 */
2800 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2801 for (uint32_t i = 0; i < cArgs; i++)
2802 {
2803 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2804 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2805 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2806 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2807 }
2808 /*
2809 * Okay, not lucky so we have to free up the registers.
2810 */
2811 else
2812 for (uint32_t i = 0; i < cArgs; i++)
2813 {
2814 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2815 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2816 {
2817 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2818 {
2819 case kIemNativeWhat_Var:
2820 {
2821 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2822 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2823 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2824 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2825 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2826
2827 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2828 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2829 else
2830 {
2831 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2832 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2833 }
2834 break;
2835 }
2836
2837 case kIemNativeWhat_Tmp:
2838 case kIemNativeWhat_Arg:
2839 case kIemNativeWhat_rc:
2840 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2841 default:
2842 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2843 }
2844
2845 }
2846 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2847 {
2848 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2849 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2850 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2851 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2852 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2853 }
2854 else
2855 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2856 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2857 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2858 }
2859 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2860 return true;
2861}
2862
2863
2864DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2865
2866
2867#if 0
2868/**
2869 * Frees a register assignment of any type.
2870 *
2871 * @param pReNative The native recompile state.
2872 * @param idxHstReg The register to free.
2873 *
2874 * @note Does not update variables.
2875 */
2876DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2877{
2878 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2879 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2880 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2881 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2882 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2883 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2884 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2885 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2886 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2887 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2888 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2889 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2890 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2891 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2892
2893 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2894 /* no flushing, right:
2895 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2896 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2897 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2898 */
2899}
2900#endif
2901
2902
2903/**
2904 * Frees a temporary register.
2905 *
2906 * Any shadow copies of guest registers assigned to the host register will not
2907 * be flushed by this operation.
2908 */
2909DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2910{
2911 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2912 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2913 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2914 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2915 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2916}
2917
2918
2919/**
2920 * Frees a temporary immediate register.
2921 *
2922 * It is assumed that the call has not modified the register, so it still hold
2923 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2924 */
2925DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2926{
2927 iemNativeRegFreeTmp(pReNative, idxHstReg);
2928}
2929
2930
2931/**
2932 * Called right before emitting a call instruction to move anything important
2933 * out of call-volatile registers, free and flush the call-volatile registers,
2934 * optionally freeing argument variables.
2935 *
2936 * @returns New code buffer offset, UINT32_MAX on failure.
2937 * @param pReNative The native recompile state.
2938 * @param off The code buffer offset.
2939 * @param cArgs The number of arguments the function call takes.
2940 * It is presumed that the host register part of these have
2941 * been allocated as such already and won't need moving,
2942 * just freeing.
2943 */
2944DECL_HIDDEN_THROW(uint32_t)
2945iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2946{
2947 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
2948
2949 /*
2950 * Move anything important out of volatile registers.
2951 */
2952 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2953 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2954 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2955#ifdef IEMNATIVE_REG_FIXED_TMP0
2956 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2957#endif
2958 & ~g_afIemNativeCallRegs[cArgs];
2959
2960 fRegsToMove &= pReNative->Core.bmHstRegs;
2961 if (!fRegsToMove)
2962 { /* likely */ }
2963 else
2964 while (fRegsToMove != 0)
2965 {
2966 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2967 fRegsToMove &= ~RT_BIT_32(idxReg);
2968
2969 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2970 {
2971 case kIemNativeWhat_Var:
2972 {
2973 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2974 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2975 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2976 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2977 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2978 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2979 else
2980 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2981 continue;
2982 }
2983
2984 case kIemNativeWhat_Arg:
2985 AssertMsgFailed(("What?!?: %u\n", idxReg));
2986 continue;
2987
2988 case kIemNativeWhat_rc:
2989 case kIemNativeWhat_Tmp:
2990 AssertMsgFailed(("Missing free: %u\n", idxReg));
2991 continue;
2992
2993 case kIemNativeWhat_FixedTmp:
2994 case kIemNativeWhat_pVCpuFixed:
2995 case kIemNativeWhat_pCtxFixed:
2996 case kIemNativeWhat_FixedReserved:
2997 case kIemNativeWhat_Invalid:
2998 case kIemNativeWhat_End:
2999 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3000 }
3001 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3002 }
3003
3004 /*
3005 * Do the actual freeing.
3006 */
3007 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3008
3009 /* If there are guest register shadows in any call-volatile register, we
3010 have to clear the corrsponding guest register masks for each register. */
3011 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3012 if (fHstRegsWithGstShadow)
3013 {
3014 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3015 do
3016 {
3017 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3018 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
3019
3020 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3021 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3022 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3023 } while (fHstRegsWithGstShadow != 0);
3024 }
3025
3026 return off;
3027}
3028
3029
3030/**
3031 * Flushes a set of guest register shadow copies.
3032 *
3033 * This is usually done after calling a threaded function or a C-implementation
3034 * of an instruction.
3035 *
3036 * @param pReNative The native recompile state.
3037 * @param fGstRegs Set of guest registers to flush.
3038 */
3039DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3040{
3041 /*
3042 * Reduce the mask by what's currently shadowed
3043 */
3044 fGstRegs &= pReNative->Core.bmGstRegShadows;
3045 if (fGstRegs)
3046 {
3047 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3048 if (pReNative->Core.bmGstRegShadows)
3049 {
3050 /*
3051 * Partial.
3052 */
3053 do
3054 {
3055 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3056 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3057 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3058 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3059 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3060
3061 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3062 fGstRegs &= ~fInThisHstReg;
3063 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3064 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3065 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3066 } while (fGstRegs != 0);
3067 }
3068 else
3069 {
3070 /*
3071 * Clear all.
3072 */
3073 do
3074 {
3075 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3076 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3077 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3078 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3079 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3080
3081 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3082 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3083 } while (fGstRegs != 0);
3084 pReNative->Core.bmHstRegsWithGstShadow = 0;
3085 }
3086 }
3087}
3088
3089
3090/**
3091 * Flushes any delayed guest register writes.
3092 *
3093 * This must be called prior to calling CImpl functions and any helpers that use
3094 * the guest state (like raising exceptions) and such.
3095 *
3096 * This optimization has not yet been implemented. The first target would be
3097 * RIP updates, since these are the most common ones.
3098 */
3099DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3100{
3101 RT_NOREF(pReNative, off);
3102 return off;
3103}
3104
3105
3106/*********************************************************************************************************************************
3107* Code Emitters (larger snippets) *
3108*********************************************************************************************************************************/
3109
3110/**
3111 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3112 * extending to 64-bit width.
3113 *
3114 * @returns New code buffer offset on success, UINT32_MAX on failure.
3115 * @param pReNative .
3116 * @param off The current code buffer position.
3117 * @param idxHstReg The host register to load the guest register value into.
3118 * @param enmGstReg The guest register to load.
3119 *
3120 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3121 * that is something the caller needs to do if applicable.
3122 */
3123DECL_HIDDEN_THROW(uint32_t)
3124iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3125{
3126 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3127 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3128
3129 switch (g_aGstShadowInfo[enmGstReg].cb)
3130 {
3131 case sizeof(uint64_t):
3132 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3133 case sizeof(uint32_t):
3134 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3135 case sizeof(uint16_t):
3136 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3137#if 0 /* not present in the table. */
3138 case sizeof(uint8_t):
3139 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3140#endif
3141 default:
3142 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3143 }
3144}
3145
3146
3147#ifdef VBOX_STRICT
3148/**
3149 * Emitting code that checks that the content of register @a idxReg is the same
3150 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3151 * instruction if that's not the case.
3152 *
3153 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3154 * Trashes EFLAGS on AMD64.
3155 */
3156static uint32_t
3157iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3158{
3159# ifdef RT_ARCH_AMD64
3160 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3161
3162 /* cmp reg, [mem] */
3163 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3164 {
3165 if (idxReg >= 8)
3166 pbCodeBuf[off++] = X86_OP_REX_R;
3167 pbCodeBuf[off++] = 0x38;
3168 }
3169 else
3170 {
3171 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3172 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3173 else
3174 {
3175 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3176 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3177 else
3178 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3179 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3180 if (idxReg >= 8)
3181 pbCodeBuf[off++] = X86_OP_REX_R;
3182 }
3183 pbCodeBuf[off++] = 0x39;
3184 }
3185 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3186
3187 /* je/jz +1 */
3188 pbCodeBuf[off++] = 0x74;
3189 pbCodeBuf[off++] = 0x01;
3190
3191 /* int3 */
3192 pbCodeBuf[off++] = 0xcc;
3193
3194 /* For values smaller than the register size, we must check that the rest
3195 of the register is all zeros. */
3196 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3197 {
3198 /* test reg64, imm32 */
3199 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3200 pbCodeBuf[off++] = 0xf7;
3201 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3202 pbCodeBuf[off++] = 0;
3203 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3204 pbCodeBuf[off++] = 0xff;
3205 pbCodeBuf[off++] = 0xff;
3206
3207 /* je/jz +1 */
3208 pbCodeBuf[off++] = 0x74;
3209 pbCodeBuf[off++] = 0x01;
3210
3211 /* int3 */
3212 pbCodeBuf[off++] = 0xcc;
3213 }
3214 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3215 {
3216 /* rol reg64, 32 */
3217 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3218 pbCodeBuf[off++] = 0xc1;
3219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3220 pbCodeBuf[off++] = 32;
3221
3222 /* test reg32, ffffffffh */
3223 if (idxReg >= 8)
3224 pbCodeBuf[off++] = X86_OP_REX_B;
3225 pbCodeBuf[off++] = 0xf7;
3226 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3227 pbCodeBuf[off++] = 0xff;
3228 pbCodeBuf[off++] = 0xff;
3229 pbCodeBuf[off++] = 0xff;
3230 pbCodeBuf[off++] = 0xff;
3231
3232 /* je/jz +1 */
3233 pbCodeBuf[off++] = 0x74;
3234 pbCodeBuf[off++] = 0x01;
3235
3236 /* int3 */
3237 pbCodeBuf[off++] = 0xcc;
3238
3239 /* rol reg64, 32 */
3240 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3241 pbCodeBuf[off++] = 0xc1;
3242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3243 pbCodeBuf[off++] = 32;
3244 }
3245
3246# elif defined(RT_ARCH_ARM64)
3247 /* mov TMP0, [gstreg] */
3248 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3249
3250 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3251 /* sub tmp0, tmp0, idxReg */
3252 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3253 /* cbz tmp0, +1 */
3254 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3255 /* brk #0x1000+enmGstReg */
3256 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3257
3258# else
3259# error "Port me!"
3260# endif
3261 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3262 return off;
3263}
3264#endif /* VBOX_STRICT */
3265
3266
3267
3268/**
3269 * Emits a code for checking the return code of a call and rcPassUp, returning
3270 * from the code if either are non-zero.
3271 */
3272DECL_HIDDEN_THROW(uint32_t)
3273iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3274{
3275#ifdef RT_ARCH_AMD64
3276 /*
3277 * AMD64: eax = call status code.
3278 */
3279
3280 /* edx = rcPassUp */
3281 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3282# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3283 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3284# endif
3285
3286 /* edx = eax | rcPassUp */
3287 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3288 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3289 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3290 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3291
3292 /* Jump to non-zero status return path. */
3293 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3294
3295 /* done. */
3296
3297#elif RT_ARCH_ARM64
3298 /*
3299 * ARM64: w0 = call status code.
3300 */
3301 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3302 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3303
3304 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3305
3306 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3307
3308 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3309 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3310 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3311
3312#else
3313# error "port me"
3314#endif
3315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3316 return off;
3317}
3318
3319
3320/**
3321 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3322 * raising a \#GP(0) if it isn't.
3323 *
3324 * @returns New code buffer offset, UINT32_MAX on failure.
3325 * @param pReNative The native recompile state.
3326 * @param off The code buffer offset.
3327 * @param idxAddrReg The host register with the address to check.
3328 * @param idxInstr The current instruction.
3329 */
3330DECL_HIDDEN_THROW(uint32_t)
3331iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3332{
3333 RT_NOREF(idxInstr);
3334
3335 /*
3336 * Make sure we don't have any outstanding guest register writes as we may
3337 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3338 */
3339 off = iemNativeRegFlushPendingWrites(pReNative, off);
3340
3341#ifdef RT_ARCH_AMD64
3342 /*
3343 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3344 * return raisexcpt();
3345 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3346 */
3347 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3348
3349 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3350 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3351 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3352 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3353
3354# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3355 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3356# else
3357 uint32_t const offFixup = off;
3358 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3359 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3360 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3361 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3362# endif
3363
3364 iemNativeRegFreeTmp(pReNative, iTmpReg);
3365
3366#elif defined(RT_ARCH_ARM64)
3367 /*
3368 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3369 * return raisexcpt();
3370 * ----
3371 * mov x1, 0x800000000000
3372 * add x1, x0, x1
3373 * cmp xzr, x1, lsr 48
3374 * and either:
3375 * b.ne .Lraisexcpt
3376 * or:
3377 * b.eq .Lnoexcept
3378 * movz x1, #instruction-number
3379 * b .Lraisexcpt
3380 * .Lnoexcept:
3381 */
3382 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3383
3384 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3385 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3386 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3387
3388# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3389 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3390# else
3391 uint32_t const offFixup = off;
3392 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3393 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3394 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3395 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3396# endif
3397
3398 iemNativeRegFreeTmp(pReNative, iTmpReg);
3399
3400#else
3401# error "Port me"
3402#endif
3403 return off;
3404}
3405
3406
3407/**
3408 * Emits code to check if the content of @a idxAddrReg is within the limit of
3409 * idxSegReg, raising a \#GP(0) if it isn't.
3410 *
3411 * @returns New code buffer offset; throws VBox status code on error.
3412 * @param pReNative The native recompile state.
3413 * @param off The code buffer offset.
3414 * @param idxAddrReg The host register (32-bit) with the address to
3415 * check.
3416 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3417 * against.
3418 * @param idxInstr The current instruction.
3419 */
3420DECL_HIDDEN_THROW(uint32_t)
3421iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3422 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3423{
3424 /*
3425 * Make sure we don't have any outstanding guest register writes as we may
3426 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3427 */
3428 off = iemNativeRegFlushPendingWrites(pReNative, off);
3429
3430 /** @todo implement expand down/whatnot checking */
3431 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3432
3433 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3434 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3435 kIemNativeGstRegUse_ForUpdate);
3436
3437 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3438
3439#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3440 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3441 RT_NOREF(idxInstr);
3442#else
3443 uint32_t const offFixup = off;
3444 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3445 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3446 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3447 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3448#endif
3449
3450 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3451 return off;
3452}
3453
3454
3455/**
3456 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3457 *
3458 * @returns The flush mask.
3459 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3460 * @param fGstShwFlush The starting flush mask.
3461 */
3462DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3463{
3464 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3465 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3466 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3467 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3468 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3469 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3470 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3471 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3472 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3473 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3474 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3475 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3476 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3477 return fGstShwFlush;
3478}
3479
3480
3481/**
3482 * Emits a call to a CImpl function or something similar.
3483 */
3484static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3485 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3486 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3487{
3488 /*
3489 * Flush stuff.
3490 */
3491 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3492 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3493
3494 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3495
3496 /*
3497 * Load the parameters.
3498 */
3499#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3500 /* Special code the hidden VBOXSTRICTRC pointer. */
3501 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3502 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3503 if (cAddParams > 0)
3504 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3505 if (cAddParams > 1)
3506 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3507 if (cAddParams > 2)
3508 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3509 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3510
3511#else
3512 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3513 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3514 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3515 if (cAddParams > 0)
3516 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3517 if (cAddParams > 1)
3518 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3519 if (cAddParams > 2)
3520# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3521 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3522# else
3523 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3524# endif
3525#endif
3526
3527 /*
3528 * Make the call.
3529 */
3530 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3531
3532#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3533 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3534#endif
3535
3536 /*
3537 * Check the status code.
3538 */
3539 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3540}
3541
3542
3543/**
3544 * Emits a call to a threaded worker function.
3545 */
3546static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3547{
3548 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3549 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3550 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3551
3552#ifdef RT_ARCH_AMD64
3553 /* Load the parameters and emit the call. */
3554# ifdef RT_OS_WINDOWS
3555# ifndef VBOXSTRICTRC_STRICT_ENABLED
3556 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3557 if (cParams > 0)
3558 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3559 if (cParams > 1)
3560 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3561 if (cParams > 2)
3562 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3563# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3565 if (cParams > 0)
3566 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3567 if (cParams > 1)
3568 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3569 if (cParams > 2)
3570 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3571 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3572 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3573# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3574# else
3575 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3576 if (cParams > 0)
3577 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3578 if (cParams > 1)
3579 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3580 if (cParams > 2)
3581 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3582# endif
3583
3584 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3585
3586# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3587 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3588# endif
3589
3590#elif RT_ARCH_ARM64
3591 /*
3592 * ARM64:
3593 */
3594 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3595 if (cParams > 0)
3596 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3597 if (cParams > 1)
3598 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3599 if (cParams > 2)
3600 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3601
3602 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3603
3604#else
3605# error "port me"
3606#endif
3607
3608 /*
3609 * Check the status code.
3610 */
3611 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3612
3613 return off;
3614}
3615
3616
3617/**
3618 * Emits the code at the RaiseGP0 label.
3619 */
3620static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3621{
3622 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3623 if (idxLabel != UINT32_MAX)
3624 {
3625 iemNativeLabelDefine(pReNative, idxLabel, off);
3626
3627 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3628 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3629#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3630 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3631#endif
3632 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3633
3634 /* jump back to the return sequence. */
3635 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3636 }
3637 return off;
3638}
3639
3640
3641/**
3642 * Emits the code at the ReturnWithFlags label (returns
3643 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3644 */
3645static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3646{
3647 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3648 if (idxLabel != UINT32_MAX)
3649 {
3650 iemNativeLabelDefine(pReNative, idxLabel, off);
3651
3652 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3653
3654 /* jump back to the return sequence. */
3655 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3656 }
3657 return off;
3658}
3659
3660
3661/**
3662 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3663 */
3664static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3665{
3666 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3667 if (idxLabel != UINT32_MAX)
3668 {
3669 iemNativeLabelDefine(pReNative, idxLabel, off);
3670
3671 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3672
3673 /* jump back to the return sequence. */
3674 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3675 }
3676 return off;
3677}
3678
3679
3680/**
3681 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3682 */
3683static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3684{
3685 /*
3686 * Generate the rc + rcPassUp fiddling code if needed.
3687 */
3688 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3689 if (idxLabel != UINT32_MAX)
3690 {
3691 iemNativeLabelDefine(pReNative, idxLabel, off);
3692
3693 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3694#ifdef RT_ARCH_AMD64
3695# ifdef RT_OS_WINDOWS
3696# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3697 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3698# endif
3699 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3700 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3701# else
3702 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3703 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3704# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3705 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3706# endif
3707# endif
3708# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3709 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3710# endif
3711
3712#else
3713 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3714 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3715 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3716#endif
3717
3718 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3719 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3720 }
3721 return off;
3722}
3723
3724
3725/**
3726 * Emits a standard epilog.
3727 */
3728static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3729{
3730 *pidxReturnLabel = UINT32_MAX;
3731
3732 /*
3733 * Successful return, so clear the return register (eax, w0).
3734 */
3735 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3736
3737 /*
3738 * Define label for common return point.
3739 */
3740 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3741 *pidxReturnLabel = idxReturn;
3742
3743 /*
3744 * Restore registers and return.
3745 */
3746#ifdef RT_ARCH_AMD64
3747 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3748
3749 /* Reposition esp at the r15 restore point. */
3750 pbCodeBuf[off++] = X86_OP_REX_W;
3751 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3752 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3753 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3754
3755 /* Pop non-volatile registers and return */
3756 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3757 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3758 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3759 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3760 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3761 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3762 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3763 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3764# ifdef RT_OS_WINDOWS
3765 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3766 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3767# endif
3768 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3769 pbCodeBuf[off++] = 0xc9; /* leave */
3770 pbCodeBuf[off++] = 0xc3; /* ret */
3771 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3772
3773#elif RT_ARCH_ARM64
3774 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3775
3776 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3777 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3778 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3779 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3780 IEMNATIVE_FRAME_VAR_SIZE / 8);
3781 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3782 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3783 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3784 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3785 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3786 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3787 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3788 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3789 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3790 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3791 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3792 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3793
3794 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3795 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3796 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3797 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3798
3799 /* retab / ret */
3800# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3801 if (1)
3802 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3803 else
3804# endif
3805 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3806
3807#else
3808# error "port me"
3809#endif
3810 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3811
3812 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3813}
3814
3815
3816/**
3817 * Emits a standard prolog.
3818 */
3819static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3820{
3821#ifdef RT_ARCH_AMD64
3822 /*
3823 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3824 * reserving 64 bytes for stack variables plus 4 non-register argument
3825 * slots. Fixed register assignment: xBX = pReNative;
3826 *
3827 * Since we always do the same register spilling, we can use the same
3828 * unwind description for all the code.
3829 */
3830 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3831 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3832 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3833 pbCodeBuf[off++] = 0x8b;
3834 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3835 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3836 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3837# ifdef RT_OS_WINDOWS
3838 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3839 pbCodeBuf[off++] = 0x8b;
3840 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3841 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3842 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3843# else
3844 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3845 pbCodeBuf[off++] = 0x8b;
3846 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3847# endif
3848 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3849 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3850 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3851 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3852 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3853 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3854 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3855 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3856
3857 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3858 X86_GREG_xSP,
3859 IEMNATIVE_FRAME_ALIGN_SIZE
3860 + IEMNATIVE_FRAME_VAR_SIZE
3861 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3862 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3863 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3864 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3865 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3866
3867#elif RT_ARCH_ARM64
3868 /*
3869 * We set up a stack frame exactly like on x86, only we have to push the
3870 * return address our selves here. We save all non-volatile registers.
3871 */
3872 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3873
3874# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3875 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3876 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3877 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3878 /* pacibsp */
3879 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3880# endif
3881
3882 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3883 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3884 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3885 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3886 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3887 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3888 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3889 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3890 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3891 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3892 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3893 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3894 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3895 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3896 /* Save the BP and LR (ret address) registers at the top of the frame. */
3897 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3898 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3899 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3900 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3901 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3902 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3903
3904 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3905 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3906
3907 /* mov r28, r0 */
3908 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3909 /* mov r27, r1 */
3910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3911
3912#else
3913# error "port me"
3914#endif
3915 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3916 return off;
3917}
3918
3919
3920
3921
3922/*********************************************************************************************************************************
3923* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
3924*********************************************************************************************************************************/
3925
3926#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3927 { \
3928 pReNative->fMc = (a_fMcFlags); \
3929 pReNative->fCImpl = (a_fCImplFlags); \
3930 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
3931
3932/** We have to get to the end in recompilation mode, as otherwise we won't
3933 * generate code for all the IEM_MC_IF_XXX branches. */
3934#define IEM_MC_END() \
3935 } return off
3936
3937
3938
3939/*********************************************************************************************************************************
3940* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
3941*********************************************************************************************************************************/
3942
3943#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
3944 pReNative->fMc = 0; \
3945 pReNative->fCImpl = (a_fFlags); \
3946 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3947
3948
3949#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3950 pReNative->fMc = 0; \
3951 pReNative->fCImpl = (a_fFlags); \
3952 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3953
3954DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3955 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3956 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3957{
3958 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3959}
3960
3961
3962#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3963 pReNative->fMc = 0; \
3964 pReNative->fCImpl = (a_fFlags); \
3965 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
3966 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3967
3968DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3969 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3970 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3971{
3972 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3973}
3974
3975
3976#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3977 pReNative->fMc = 0; \
3978 pReNative->fCImpl = (a_fFlags); \
3979 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
3980 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3981
3982DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3983 uint8_t idxInstr, uint64_t a_fGstShwFlush,
3984 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
3985 uint64_t uArg2)
3986{
3987 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3988}
3989
3990
3991
3992/*********************************************************************************************************************************
3993* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
3994*********************************************************************************************************************************/
3995
3996/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
3997 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
3998DECL_INLINE_THROW(uint32_t)
3999iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4000{
4001 /*
4002 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4003 * return with special status code and make the execution loop deal with
4004 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4005 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4006 * could continue w/o interruption, it probably will drop into the
4007 * debugger, so not worth the effort of trying to services it here and we
4008 * just lump it in with the handling of the others.
4009 *
4010 * To simplify the code and the register state management even more (wrt
4011 * immediate in AND operation), we always update the flags and skip the
4012 * extra check associated conditional jump.
4013 */
4014 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4015 <= UINT32_MAX);
4016 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4017 kIemNativeGstRegUse_ForUpdate);
4018 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4019 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4020 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4021 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4022 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4023
4024 /* Free but don't flush the EFLAGS register. */
4025 iemNativeRegFreeTmp(pReNative, idxEflReg);
4026
4027 return off;
4028}
4029
4030
4031#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4032 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4033
4034#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4035 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4036 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4037
4038/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4039DECL_INLINE_THROW(uint32_t)
4040iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4041{
4042 /* Allocate a temporary PC register. */
4043 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4044
4045 /* Perform the addition and store the result. */
4046 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4047 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4048
4049 /* Free but don't flush the PC register. */
4050 iemNativeRegFreeTmp(pReNative, idxPcReg);
4051
4052 return off;
4053}
4054
4055
4056#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4057 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4058
4059#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4060 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4061 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4062
4063/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4064DECL_INLINE_THROW(uint32_t)
4065iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4066{
4067 /* Allocate a temporary PC register. */
4068 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4069
4070 /* Perform the addition and store the result. */
4071 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4072 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4073
4074 /* Free but don't flush the PC register. */
4075 iemNativeRegFreeTmp(pReNative, idxPcReg);
4076
4077 return off;
4078}
4079
4080
4081#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4082 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4083
4084#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4085 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4086 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4087
4088/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4089DECL_INLINE_THROW(uint32_t)
4090iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4091{
4092 /* Allocate a temporary PC register. */
4093 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4094
4095 /* Perform the addition and store the result. */
4096 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4097 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4098 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4099
4100 /* Free but don't flush the PC register. */
4101 iemNativeRegFreeTmp(pReNative, idxPcReg);
4102
4103 return off;
4104}
4105
4106
4107
4108/*********************************************************************************************************************************
4109* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4110*********************************************************************************************************************************/
4111
4112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4114 (a_enmEffOpSize), pCallEntry->idxInstr)
4115
4116#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4117 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4118 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4119
4120#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4121 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4122 IEMMODE_16BIT, pCallEntry->idxInstr)
4123
4124#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4125 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4126 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4127
4128#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4129 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4130 IEMMODE_64BIT, pCallEntry->idxInstr)
4131
4132#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4133 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4134 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4135
4136/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4137 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4138 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4139DECL_INLINE_THROW(uint32_t)
4140iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4141 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4142{
4143 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4144
4145 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4146 off = iemNativeRegFlushPendingWrites(pReNative, off);
4147
4148 /* Allocate a temporary PC register. */
4149 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4150
4151 /* Perform the addition. */
4152 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4153
4154 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4155 {
4156 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4157 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4158 }
4159 else
4160 {
4161 /* Just truncate the result to 16-bit IP. */
4162 Assert(enmEffOpSize == IEMMODE_16BIT);
4163 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4164 }
4165 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4166
4167 /* Free but don't flush the PC register. */
4168 iemNativeRegFreeTmp(pReNative, idxPcReg);
4169
4170 return off;
4171}
4172
4173
4174#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4175 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4176 (a_enmEffOpSize), pCallEntry->idxInstr)
4177
4178#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4179 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4180 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4181
4182#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4183 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4184 IEMMODE_16BIT, pCallEntry->idxInstr)
4185
4186#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4187 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4188 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4189
4190#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4191 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4192 IEMMODE_32BIT, pCallEntry->idxInstr)
4193
4194#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4195 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4196 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4197
4198/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4199 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4200 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4201DECL_INLINE_THROW(uint32_t)
4202iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4203 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4204{
4205 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4206
4207 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4208 off = iemNativeRegFlushPendingWrites(pReNative, off);
4209
4210 /* Allocate a temporary PC register. */
4211 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4212
4213 /* Perform the addition. */
4214 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4215
4216 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4217 if (enmEffOpSize == IEMMODE_16BIT)
4218 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4219
4220 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4221 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4222
4223 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4224
4225 /* Free but don't flush the PC register. */
4226 iemNativeRegFreeTmp(pReNative, idxPcReg);
4227
4228 return off;
4229}
4230
4231
4232#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4233 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4234
4235#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4236 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4237 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4238
4239#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4240 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4241
4242#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4243 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4244 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4245
4246#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4247 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4248
4249#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4250 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4251 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4252
4253/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4254DECL_INLINE_THROW(uint32_t)
4255iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4256 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4257{
4258 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4259 off = iemNativeRegFlushPendingWrites(pReNative, off);
4260
4261 /* Allocate a temporary PC register. */
4262 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4263
4264 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4265 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4266 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4267 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4268 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4269
4270 /* Free but don't flush the PC register. */
4271 iemNativeRegFreeTmp(pReNative, idxPcReg);
4272
4273 return off;
4274}
4275
4276
4277
4278/*********************************************************************************************************************************
4279* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4280*********************************************************************************************************************************/
4281
4282/**
4283 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4284 *
4285 * @returns Pointer to the condition stack entry on success, NULL on failure
4286 * (too many nestings)
4287 */
4288DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4289{
4290 uint32_t const idxStack = pReNative->cCondDepth;
4291 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4292
4293 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4294 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4295
4296 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4297 pEntry->fInElse = false;
4298 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4299 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4300
4301 return pEntry;
4302}
4303
4304
4305/**
4306 * Start of the if-block, snapshotting the register and variable state.
4307 */
4308DECL_INLINE_THROW(void)
4309iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4310{
4311 Assert(offIfBlock != UINT32_MAX);
4312 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4313 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4314 Assert(!pEntry->fInElse);
4315
4316 /* Define the start of the IF block if request or for disassembly purposes. */
4317 if (idxLabelIf != UINT32_MAX)
4318 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4319#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4320 else
4321 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4322#else
4323 RT_NOREF(offIfBlock);
4324#endif
4325
4326 /* Copy the initial state so we can restore it in the 'else' block. */
4327 pEntry->InitialState = pReNative->Core;
4328}
4329
4330
4331#define IEM_MC_ELSE() } while (0); \
4332 off = iemNativeEmitElse(pReNative, off); \
4333 do {
4334
4335/** Emits code related to IEM_MC_ELSE. */
4336DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4337{
4338 /* Check sanity and get the conditional stack entry. */
4339 Assert(off != UINT32_MAX);
4340 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4341 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4342 Assert(!pEntry->fInElse);
4343
4344 /* Jump to the endif */
4345 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4346
4347 /* Define the else label and enter the else part of the condition. */
4348 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4349 pEntry->fInElse = true;
4350
4351 /* Snapshot the core state so we can do a merge at the endif and restore
4352 the snapshot we took at the start of the if-block. */
4353 pEntry->IfFinalState = pReNative->Core;
4354 pReNative->Core = pEntry->InitialState;
4355
4356 return off;
4357}
4358
4359
4360#define IEM_MC_ENDIF() } while (0); \
4361 off = iemNativeEmitEndIf(pReNative, off)
4362
4363/** Emits code related to IEM_MC_ENDIF. */
4364DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4365{
4366 /* Check sanity and get the conditional stack entry. */
4367 Assert(off != UINT32_MAX);
4368 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4369 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4370
4371 /*
4372 * Now we have find common group with the core state at the end of the
4373 * if-final. Use the smallest common denominator and just drop anything
4374 * that isn't the same in both states.
4375 */
4376 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4377 * which is why we're doing this at the end of the else-block.
4378 * But we'd need more info about future for that to be worth the effort. */
4379 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4380 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4381 {
4382 /* shadow guest stuff first. */
4383 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4384 if (fGstRegs)
4385 {
4386 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4387 do
4388 {
4389 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4390 fGstRegs &= ~RT_BIT_64(idxGstReg);
4391
4392 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4393 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4394 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4395 {
4396 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4397 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4398 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4399 }
4400 } while (fGstRegs);
4401 }
4402 else
4403 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4404
4405 /* Check variables next. For now we must require them to be identical
4406 or stuff we can recreate. */
4407 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4408 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4409 if (fVars)
4410 {
4411 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4412 do
4413 {
4414 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4415 fVars &= ~RT_BIT_32(idxVar);
4416
4417 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4418 {
4419 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4420 continue;
4421 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4422 {
4423 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4424 if (idxHstReg != UINT8_MAX)
4425 {
4426 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4427 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4428 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4429 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4430 }
4431 continue;
4432 }
4433 }
4434 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4435 continue;
4436
4437 /* Irreconcilable, so drop it. */
4438 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4439 if (idxHstReg != UINT8_MAX)
4440 {
4441 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4442 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4443 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4444 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4445 }
4446 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4447 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4448 } while (fVars);
4449 }
4450
4451 /* Finally, check that the host register allocations matches. */
4452 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4453 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4454 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4455 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4456 }
4457
4458 /*
4459 * Define the endif label and maybe the else one if we're still in the 'if' part.
4460 */
4461 if (!pEntry->fInElse)
4462 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4463 else
4464 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4465 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4466
4467 /* Pop the conditional stack.*/
4468 pReNative->cCondDepth -= 1;
4469
4470 return off;
4471}
4472
4473
4474#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4475 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4476 do {
4477
4478/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4479DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4480{
4481 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4482
4483 /* Get the eflags. */
4484 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4485 kIemNativeGstRegUse_ReadOnly);
4486
4487 /* Test and jump. */
4488 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4489
4490 /* Free but don't flush the EFlags register. */
4491 iemNativeRegFreeTmp(pReNative, idxEflReg);
4492
4493 /* Make a copy of the core state now as we start the if-block. */
4494 iemNativeCondStartIfBlock(pReNative, off);
4495
4496 return off;
4497}
4498
4499
4500#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4501 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4502 do {
4503
4504/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4505DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4506{
4507 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4508
4509 /* Get the eflags. */
4510 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4511 kIemNativeGstRegUse_ReadOnly);
4512
4513 /* Test and jump. */
4514 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4515
4516 /* Free but don't flush the EFlags register. */
4517 iemNativeRegFreeTmp(pReNative, idxEflReg);
4518
4519 /* Make a copy of the core state now as we start the if-block. */
4520 iemNativeCondStartIfBlock(pReNative, off);
4521
4522 return off;
4523}
4524
4525
4526#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4527 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4528 do {
4529
4530/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4531DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4532{
4533 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4534
4535 /* Get the eflags. */
4536 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4537 kIemNativeGstRegUse_ReadOnly);
4538
4539 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4540 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4541
4542 /* Test and jump. */
4543 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4544
4545 /* Free but don't flush the EFlags register. */
4546 iemNativeRegFreeTmp(pReNative, idxEflReg);
4547
4548 /* Make a copy of the core state now as we start the if-block. */
4549 iemNativeCondStartIfBlock(pReNative, off);
4550
4551 return off;
4552}
4553
4554
4555#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4556 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4557 do {
4558
4559/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4560DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4561{
4562 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4563
4564 /* Get the eflags. */
4565 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4566 kIemNativeGstRegUse_ReadOnly);
4567
4568 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4569 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4570
4571 /* Test and jump. */
4572 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4573
4574 /* Free but don't flush the EFlags register. */
4575 iemNativeRegFreeTmp(pReNative, idxEflReg);
4576
4577 /* Make a copy of the core state now as we start the if-block. */
4578 iemNativeCondStartIfBlock(pReNative, off);
4579
4580 return off;
4581}
4582
4583
4584#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4585 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4586 do {
4587
4588#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4589 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4590 do {
4591
4592/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4593DECL_INLINE_THROW(uint32_t)
4594iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4595 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4596{
4597 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4598
4599 /* Get the eflags. */
4600 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4601 kIemNativeGstRegUse_ReadOnly);
4602
4603 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4604 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4605
4606 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4607 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4608 Assert(iBitNo1 != iBitNo2);
4609
4610#ifdef RT_ARCH_AMD64
4611 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4612
4613 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4614 if (iBitNo1 > iBitNo2)
4615 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4616 else
4617 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4618 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4619
4620#elif defined(RT_ARCH_ARM64)
4621 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4622 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4623
4624 /* and tmpreg, eflreg, #1<<iBitNo1 */
4625 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4626
4627 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4628 if (iBitNo1 > iBitNo2)
4629 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4630 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4631 else
4632 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4633 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4634
4635 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4636
4637#else
4638# error "Port me"
4639#endif
4640
4641 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4642 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4643 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4644
4645 /* Free but don't flush the EFlags and tmp registers. */
4646 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4647 iemNativeRegFreeTmp(pReNative, idxEflReg);
4648
4649 /* Make a copy of the core state now as we start the if-block. */
4650 iemNativeCondStartIfBlock(pReNative, off);
4651
4652 return off;
4653}
4654
4655
4656#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4657 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4658 do {
4659
4660#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4661 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4662 do {
4663
4664/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4665 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4666DECL_INLINE_THROW(uint32_t)
4667iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4668 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4669{
4670 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4671
4672 /* We need an if-block label for the non-inverted variant. */
4673 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4674 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4675
4676 /* Get the eflags. */
4677 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4678 kIemNativeGstRegUse_ReadOnly);
4679
4680 /* Translate the flag masks to bit numbers. */
4681 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4682 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4683
4684 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4685 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4686 Assert(iBitNo1 != iBitNo);
4687
4688 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4689 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4690 Assert(iBitNo2 != iBitNo);
4691 Assert(iBitNo2 != iBitNo1);
4692
4693#ifdef RT_ARCH_AMD64
4694 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4695#elif defined(RT_ARCH_ARM64)
4696 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4697#endif
4698
4699 /* Check for the lone bit first. */
4700 if (!fInverted)
4701 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4702 else
4703 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4704
4705 /* Then extract and compare the other two bits. */
4706#ifdef RT_ARCH_AMD64
4707 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4708 if (iBitNo1 > iBitNo2)
4709 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4710 else
4711 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4712 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4713
4714#elif defined(RT_ARCH_ARM64)
4715 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4716
4717 /* and tmpreg, eflreg, #1<<iBitNo1 */
4718 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4719
4720 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4721 if (iBitNo1 > iBitNo2)
4722 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4723 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4724 else
4725 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4726 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4727
4728 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4729
4730#else
4731# error "Port me"
4732#endif
4733
4734 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4735 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4736 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4737
4738 /* Free but don't flush the EFlags and tmp registers. */
4739 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4740 iemNativeRegFreeTmp(pReNative, idxEflReg);
4741
4742 /* Make a copy of the core state now as we start the if-block. */
4743 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4744
4745 return off;
4746}
4747
4748
4749#define IEM_MC_IF_CX_IS_NZ() \
4750 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4751 do {
4752
4753/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4754DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4755{
4756 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4757
4758 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4759 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4760 kIemNativeGstRegUse_ReadOnly);
4761 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4762 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4763
4764 iemNativeCondStartIfBlock(pReNative, off);
4765 return off;
4766}
4767
4768
4769#define IEM_MC_IF_ECX_IS_NZ() \
4770 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4771 do {
4772
4773#define IEM_MC_IF_RCX_IS_NZ() \
4774 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4775 do {
4776
4777/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4778DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4779{
4780 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4781
4782 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4783 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4784 kIemNativeGstRegUse_ReadOnly);
4785 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4786 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4787
4788 iemNativeCondStartIfBlock(pReNative, off);
4789 return off;
4790}
4791
4792
4793#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4794 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4795 do {
4796
4797#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4798 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4799 do {
4800
4801/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4802DECL_INLINE_THROW(uint32_t)
4803iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4804{
4805 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4806
4807 /* We have to load both RCX and EFLAGS before we can start branching,
4808 otherwise we'll end up in the else-block with an inconsistent
4809 register allocator state.
4810 Doing EFLAGS first as it's more likely to be loaded, right? */
4811 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4812 kIemNativeGstRegUse_ReadOnly);
4813 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4814 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4815 kIemNativeGstRegUse_ReadOnly);
4816
4817 /** @todo we could reduce this to a single branch instruction by spending a
4818 * temporary register and some setnz stuff. Not sure if loops are
4819 * worth it. */
4820 /* Check CX. */
4821 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4822
4823 /* Check the EFlags bit. */
4824 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4825 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4826 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4827 !fCheckIfSet /*fJmpIfSet*/);
4828
4829 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4830 iemNativeRegFreeTmp(pReNative, idxEflReg);
4831
4832 iemNativeCondStartIfBlock(pReNative, off);
4833 return off;
4834}
4835
4836
4837#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4838 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4839 do {
4840
4841#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4842 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4843 do {
4844
4845#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4846 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4847 do {
4848
4849#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4850 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4851 do {
4852
4853/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4854 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4855 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4856 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4857DECL_INLINE_THROW(uint32_t)
4858iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4859 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4860{
4861 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4862
4863 /* We have to load both RCX and EFLAGS before we can start branching,
4864 otherwise we'll end up in the else-block with an inconsistent
4865 register allocator state.
4866 Doing EFLAGS first as it's more likely to be loaded, right? */
4867 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4868 kIemNativeGstRegUse_ReadOnly);
4869 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4870 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4871 kIemNativeGstRegUse_ReadOnly);
4872
4873 /** @todo we could reduce this to a single branch instruction by spending a
4874 * temporary register and some setnz stuff. Not sure if loops are
4875 * worth it. */
4876 /* Check RCX/ECX. */
4877 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4878
4879 /* Check the EFlags bit. */
4880 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4881 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4882 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4883 !fCheckIfSet /*fJmpIfSet*/);
4884
4885 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4886 iemNativeRegFreeTmp(pReNative, idxEflReg);
4887
4888 iemNativeCondStartIfBlock(pReNative, off);
4889 return off;
4890}
4891
4892
4893
4894/*********************************************************************************************************************************
4895* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
4896*********************************************************************************************************************************/
4897/** Number of hidden arguments for CIMPL calls.
4898 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
4899#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4900# define IEM_CIMPL_HIDDEN_ARGS 3
4901#else
4902# define IEM_CIMPL_HIDDEN_ARGS 2
4903#endif
4904
4905#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
4906 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
4907
4908#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
4909 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
4910
4911#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
4912 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
4913
4914#define IEM_MC_LOCAL(a_Type, a_Name) \
4915 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
4916
4917#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
4918 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
4919
4920
4921/**
4922 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
4923 */
4924DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
4925{
4926 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
4927 return IEM_CIMPL_HIDDEN_ARGS;
4928 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
4929 return 1;
4930 return 0;
4931}
4932
4933
4934/**
4935 * Internal work that allocates a variable with kind set to
4936 * kIemNativeVarKind_Invalid and no current stack allocation.
4937 *
4938 * The kind will either be set by the caller or later when the variable is first
4939 * assigned a value.
4940 */
4941static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
4942{
4943 Assert(cbType > 0 && cbType <= 64);
4944 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
4945 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
4946 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
4947 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
4948 pReNative->Core.aVars[idxVar].cbVar = cbType;
4949 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
4950 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4951 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
4952 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
4953 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
4954 pReNative->Core.aVars[idxVar].u.uValue = 0;
4955 return idxVar;
4956}
4957
4958
4959/**
4960 * Internal work that allocates an argument variable w/o setting enmKind.
4961 */
4962static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
4963{
4964 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
4965 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
4966 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
4967
4968 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
4969 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
4970 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
4971 return idxVar;
4972}
4973
4974
4975/**
4976 * Changes the variable to a stack variable.
4977 *
4978 * Currently this is s only possible to do the first time the variable is used,
4979 * switching later is can be implemented but not done.
4980 *
4981 * @param pReNative The recompiler state.
4982 * @param idxVar The variable.
4983 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
4984 */
4985static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4986{
4987 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
4988 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4989 {
4990 /* We could in theory transition from immediate to stack as well, but it
4991 would involve the caller doing work storing the value on the stack. So,
4992 till that's required we only allow transition from invalid. */
4993 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
4994 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
4995 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
4996
4997 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
4998 {
4999 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5000 {
5001 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
5002 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5003 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5004 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5005 return;
5006 }
5007 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
5008 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5009 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5010 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5011 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5012 uint32_t bmStack = ~pReNative->Core.bmStack;
5013 while (bmStack != UINT32_MAX)
5014 {
5015 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5016 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5017 if (!(iSlot & fBitAlignMask))
5018 {
5019 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5020 {
5021 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5022 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5023 return;
5024 }
5025 }
5026 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5027 }
5028 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5029 }
5030 }
5031}
5032
5033
5034/**
5035 * Changes it to a variable with a constant value.
5036 *
5037 * This does not require stack storage as we know the value and can always
5038 * reload it, unless of course it's referenced.
5039 *
5040 * @param pReNative The recompiler state.
5041 * @param idxVar The variable.
5042 * @param uValue The immediate value.
5043 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5044 */
5045static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5046{
5047 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5048 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5049 {
5050 /* Only simple trasnsitions for now. */
5051 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5052 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5053 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5054 }
5055 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5056}
5057
5058
5059/**
5060 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5061 *
5062 * @param pReNative The recompiler state.
5063 * @param idxVar The variable.
5064 * @param idxOtherVar The variable to take the (stack) address of.
5065 *
5066 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5067 */
5068static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5069{
5070 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5071 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5072
5073 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5074 {
5075 /* Only simple trasnsitions for now. */
5076 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5077 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5078 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5079 }
5080 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5081
5082 /* Update the other variable, ensure it's a stack variable. */
5083 /** @todo handle variables with const values... that's go boom now. */
5084 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5085 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5086}
5087
5088
5089DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5090{
5091 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5092}
5093
5094
5095DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5096{
5097 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5098 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5099 return idxVar;
5100}
5101
5102
5103DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5104{
5105 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5106 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5107 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5108 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5109
5110 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5111 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5112 return idxArgVar;
5113}
5114
5115
5116DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5117{
5118 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5119 iemNativeVarSetKindToStack(pReNative, idxVar);
5120 return idxVar;
5121}
5122
5123
5124DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5125{
5126 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5127 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5128 return idxVar;
5129}
5130
5131
5132/**
5133 * Makes sure variable @a idxVar has a register assigned to it.
5134 *
5135 * @returns The host register number.
5136 * @param pReNative The recompiler state.
5137 * @param idxVar The variable.
5138 * @param poff Pointer to the instruction buffer offset.
5139 * In case a register needs to be freed up.
5140 */
5141DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5142{
5143 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5144
5145 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5146 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5147 return idxReg;
5148
5149 /*
5150 * We have to allocate a register for the variable, even if its a stack one
5151 * as we don't know if there are modification being made to it before its
5152 * finalized (todo: analyze and insert hints about that?).
5153 *
5154 * If we can, we try get the correct register for argument variables. This
5155 * is assuming that most argument variables are fetched as close as possible
5156 * to the actual call, so that there aren't any interfering hidden calls
5157 * (memory accesses, etc) inbetween.
5158 *
5159 * If we cannot or it's a variable, we make sure no argument registers
5160 * that will be used by this MC block will be allocated here, and we always
5161 * prefer non-volatile registers to avoid needing to spill stuff for internal
5162 * call.
5163 */
5164 /** @todo Detect too early argument value fetches and warn about hidden
5165 * calls causing less optimal code to be generated in the python script. */
5166
5167 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5168 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5169 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5170 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5171 else
5172 {
5173 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5174 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5175 & ~pReNative->Core.bmHstRegsWithGstShadow
5176 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5177 & fNotArgsMask;
5178 if (fRegs)
5179 {
5180 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5181 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5182 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5183 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5184 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5185 }
5186 else
5187 {
5188 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5189 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5190 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5191 }
5192 }
5193 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5194 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5195 return idxReg;
5196}
5197
5198
5199
5200/*********************************************************************************************************************************
5201* Emitters for IEM_MC_CALL_CIMPL_XXX *
5202*********************************************************************************************************************************/
5203
5204/**
5205 * Emits code to load a reference to the given guest register into @a idxGprDst.
5206 */
5207DECL_INLINE_THROW(uint32_t)
5208iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5209 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5210{
5211 /*
5212 * Get the offset relative to the CPUMCTX structure.
5213 */
5214 uint32_t offCpumCtx;
5215 switch (enmClass)
5216 {
5217 case kIemNativeGstRegRef_Gpr:
5218 Assert(idxRegInClass < 16);
5219 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5220 break;
5221
5222 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5223 Assert(idxRegInClass < 4);
5224 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5225 break;
5226
5227 case kIemNativeGstRegRef_EFlags:
5228 Assert(idxRegInClass == 0);
5229 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5230 break;
5231
5232 case kIemNativeGstRegRef_MxCsr:
5233 Assert(idxRegInClass == 0);
5234 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5235 break;
5236
5237 case kIemNativeGstRegRef_FpuReg:
5238 Assert(idxRegInClass < 8);
5239 AssertFailed(); /** @todo what kind of indexing? */
5240 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5241 break;
5242
5243 case kIemNativeGstRegRef_MReg:
5244 Assert(idxRegInClass < 8);
5245 AssertFailed(); /** @todo what kind of indexing? */
5246 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5247 break;
5248
5249 case kIemNativeGstRegRef_XReg:
5250 Assert(idxRegInClass < 16);
5251 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5252 break;
5253
5254 default:
5255 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5256 }
5257
5258 /*
5259 * Load the value into the destination register.
5260 */
5261#ifdef RT_ARCH_AMD64
5262 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5263
5264#elif defined(RT_ARCH_ARM64)
5265 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5266 Assert(offCpumCtx < 4096);
5267 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5268
5269#else
5270# error "Port me!"
5271#endif
5272
5273 return off;
5274}
5275
5276
5277/**
5278 * Common code for CIMPL and AIMPL calls.
5279 *
5280 * These are calls that uses argument variables and such. They should not be
5281 * confused with internal calls required to implement an MC operation,
5282 * like a TLB load and similar.
5283 *
5284 * Upon return all that is left to do is to load any hidden arguments and
5285 * perform the call. All argument variables are freed.
5286 *
5287 * @returns New code buffer offset; throws VBox status code on error.
5288 * @param pReNative The native recompile state.
5289 * @param off The code buffer offset.
5290 * @param cArgs The total nubmer of arguments (includes hidden
5291 * count).
5292 * @param cHiddenArgs The number of hidden arguments. The hidden
5293 * arguments must not have any variable declared for
5294 * them, whereas all the regular arguments must
5295 * (tstIEMCheckMc ensures this).
5296 */
5297DECL_HIDDEN_THROW(uint32_t)
5298iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5299{
5300#ifdef VBOX_STRICT
5301 /*
5302 * Assert sanity.
5303 */
5304 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5305 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5306 for (unsigned i = 0; i < cHiddenArgs; i++)
5307 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5308 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5309 {
5310 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5311 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5312 }
5313#endif
5314
5315 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5316
5317 /*
5318 * First, go over the host registers that will be used for arguments and make
5319 * sure they either hold the desired argument or are free.
5320 */
5321 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5322 for (uint32_t i = 0; i < cRegArgs; i++)
5323 {
5324 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5325 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5326 {
5327 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5328 {
5329 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5330 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5331 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5332 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5333 if (uArgNo == i)
5334 { /* prefect */ }
5335 else
5336 {
5337 /* The variable allocator logic should make sure this is impossible. */
5338 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5339
5340 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5341 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5342 else
5343 {
5344 /* just free it, can be reloaded if used again */
5345 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5346 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5347 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5348 }
5349 }
5350 }
5351 else
5352 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5353 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5354 }
5355 }
5356
5357 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5358
5359 /*
5360 * Make sure the argument variables are loaded into their respective registers.
5361 *
5362 * We can optimize this by ASSUMING that any register allocations are for
5363 * registeres that have already been loaded and are ready. The previous step
5364 * saw to that.
5365 */
5366 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5367 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5368 {
5369 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5370 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5371 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5372 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5373 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5374 else
5375 {
5376 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5377 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5378 {
5379 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5381 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5382 | RT_BIT_32(idxArgReg);
5383 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5384 }
5385 else
5386 {
5387 /* Use ARG0 as temp for stuff we need registers for. */
5388 switch (pReNative->Core.aVars[idxVar].enmKind)
5389 {
5390 case kIemNativeVarKind_Stack:
5391 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5392 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5393 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg,
5394 IEMNATIVE_FP_OFF_STACK_VARS
5395 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5396 continue;
5397
5398 case kIemNativeVarKind_Immediate:
5399 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5400 continue;
5401
5402 case kIemNativeVarKind_VarRef:
5403 {
5404 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5405 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5406 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5407 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5408 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5409 IEMNATIVE_FP_OFF_STACK_VARS
5410 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5411 continue;
5412 }
5413
5414 case kIemNativeVarKind_GstRegRef:
5415 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5416 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5417 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5418 continue;
5419
5420 case kIemNativeVarKind_Invalid:
5421 case kIemNativeVarKind_End:
5422 break;
5423 }
5424 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5425 }
5426 }
5427 }
5428#ifdef VBOX_STRICT
5429 else
5430 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5431 {
5432 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5433 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5434 }
5435#endif
5436
5437#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5438 /*
5439 * If there are any stack arguments, make sure they are in their place as well.
5440 *
5441 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5442 * caller will load it later and it must be free (see first loop).
5443 */
5444 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5445 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5446 {
5447 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5448 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5449 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5450 {
5451 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5452 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5453 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5454 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5455 }
5456 else
5457 {
5458 /* Use ARG0 as temp for stuff we need registers for. */
5459 switch (pReNative->Core.aVars[idxVar].enmKind)
5460 {
5461 case kIemNativeVarKind_Stack:
5462 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5463 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5464 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5465 IEMNATIVE_FP_OFF_STACK_VARS
5466 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5467 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5468 continue;
5469
5470 case kIemNativeVarKind_Immediate:
5471 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5472 continue;
5473
5474 case kIemNativeVarKind_VarRef:
5475 {
5476 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5477 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5478 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5479 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5480 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5481 IEMNATIVE_FP_OFF_STACK_VARS
5482 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5483 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5484 continue;
5485 }
5486
5487 case kIemNativeVarKind_GstRegRef:
5488 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5489 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5490 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5491 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5492 continue;
5493
5494 case kIemNativeVarKind_Invalid:
5495 case kIemNativeVarKind_End:
5496 break;
5497 }
5498 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5499 }
5500 }
5501#else
5502 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5503#endif
5504
5505 /*
5506 * Free all argument variables (simplified).
5507 * Their lifetime always expires with the call they are for.
5508 */
5509 /** @todo Make the python script check that arguments aren't used after
5510 * IEM_MC_CALL_XXXX. */
5511 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
5512 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
5513 * an argument value. */
5514 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5515 {
5516 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
5517 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5518 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5519 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5520 }
5521 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5522
5523 /*
5524 * Flush volatile registers as we make the call.
5525 */
5526 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5527
5528 return off;
5529}
5530
5531
5532/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5533DECL_HIDDEN_THROW(uint32_t)
5534iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5535 uintptr_t pfnCImpl, uint8_t cArgs)
5536
5537{
5538 /*
5539 * Do all the call setup and cleanup.
5540 */
5541 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5542
5543 /*
5544 * Load the two hidden arguments.
5545 */
5546#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5547 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5548 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5549 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5550#else
5551 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5552 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5553#endif
5554
5555 /*
5556 * Make the call and check the return code.
5557 *
5558 * Shadow PC copies are always flushed here, other stuff depends on flags.
5559 * Segment and general purpose registers are explictily flushed via the
5560 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
5561 * macros.
5562 */
5563 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5564#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5565 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5566#endif
5567/** @todo Always flush EFLAGS if this is an xxF variation. */
5568 iemNativeRegFlushGuestShadows(pReNative,
5569 iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, RT_BIT_64(kIemNativeGstReg_Pc)) );
5570
5571 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5572}
5573
5574
5575#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
5576 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0)
5577
5578/** Emits code for IEM_MC_CALL_CIMPL_1. */
5579DECL_INLINE_THROW(uint32_t)
5580iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5581 uintptr_t pfnCImpl, uint8_t idxArg0)
5582{
5583 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5584 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5585 RT_NOREF_PV(idxArg0);
5586
5587 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 1);
5588}
5589
5590
5591#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
5592 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1)
5593
5594/** Emits code for IEM_MC_CALL_CIMPL_2. */
5595DECL_INLINE_THROW(uint32_t)
5596iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5597 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
5598{
5599 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5600 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5601 RT_NOREF_PV(idxArg0);
5602
5603 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5604 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5605 RT_NOREF_PV(idxArg1);
5606
5607 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 2);
5608}
5609
5610
5611#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
5612 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2)
5613
5614/** Emits code for IEM_MC_CALL_CIMPL_3. */
5615DECL_INLINE_THROW(uint32_t)
5616iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5617 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
5618{
5619pReNative->pInstrBuf[off++] = 0xcc;
5620 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5621 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5622 RT_NOREF_PV(idxArg0);
5623
5624 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5625 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5626 RT_NOREF_PV(idxArg1);
5627
5628 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5629 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5630 RT_NOREF_PV(idxArg2);
5631
5632 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 3);
5633}
5634
5635
5636#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3) \
5637 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
5638
5639/** Emits code for IEM_MC_CALL_CIMPL_4. */
5640DECL_INLINE_THROW(uint32_t)
5641iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5642 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
5643{
5644pReNative->pInstrBuf[off++] = 0xcc;
5645 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5646 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5647 RT_NOREF_PV(idxArg0);
5648
5649 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5650 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5651 RT_NOREF_PV(idxArg1);
5652
5653 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5654 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5655 RT_NOREF_PV(idxArg2);
5656
5657 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5658 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5659 RT_NOREF_PV(idxArg3);
5660
5661 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 4);
5662}
5663
5664
5665#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3, a4) \
5666 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
5667
5668/** Emits code for IEM_MC_CALL_CIMPL_4. */
5669DECL_INLINE_THROW(uint32_t)
5670iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5671 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
5672{
5673pReNative->pInstrBuf[off++] = 0xcc;
5674 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5675 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5676 RT_NOREF_PV(idxArg0);
5677
5678 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5679 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5680 RT_NOREF_PV(idxArg1);
5681
5682 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5683 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5684 RT_NOREF_PV(idxArg2);
5685
5686 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5687 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5688 RT_NOREF_PV(idxArg3);
5689
5690 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
5691 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
5692 RT_NOREF_PV(idxArg4);
5693
5694 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 5);
5695}
5696
5697
5698/** Flush guest GPR shadow copy. */
5699#define IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG(a_iGReg) \
5700 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_GprFirst + (a_iGReg)) )
5701
5702/** Flush guest segment register (hidden and non-hidden bits) shadow copy. */
5703#define IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG(a_iSReg) \
5704 iemNativeRegFlushGuestShadows(pReNative, \
5705 RT_BIT_64(kIemNativeGstReg_SegSelFirst + (a_iSReg)) \
5706 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + (a_iSReg)) \
5707 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + (a_iSReg)) )
5708
5709
5710
5711/*********************************************************************************************************************************
5712* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
5713*********************************************************************************************************************************/
5714
5715#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
5716 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
5717
5718/** Emits code for IEM_MC_FETCH_GREG_U16. */
5719DECL_INLINE_THROW(uint32_t)
5720iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
5721{
5722 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
5723 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
5724
5725 /*
5726 * We can either just load the low 16-bit of the GPR into a host register
5727 * for the variable, or we can do so via a shadow copy host register. The
5728 * latter will avoid having to reload it if it's being stored later, but
5729 * will waste a host register if it isn't touched again. Since we don't
5730 * know what going to happen, we choose the latter for now.
5731 */
5732 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5733 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5734 kIemNativeGstRegUse_ReadOnly);
5735
5736 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5737 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
5738 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
5739
5740 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
5741 return off;
5742}
5743
5744
5745
5746/*********************************************************************************************************************************
5747* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
5748*********************************************************************************************************************************/
5749
5750#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
5751 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
5752
5753/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
5754DECL_INLINE_THROW(uint32_t)
5755iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
5756{
5757 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5758 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
5759 kIemNativeGstRegUse_ForUpdate);
5760#ifdef RT_ARCH_AMD64
5761 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5762
5763 /* To the lowest byte of the register: mov r8, imm8 */
5764 if (iGRegEx < 16)
5765 {
5766 if (idxGstTmpReg >= 8)
5767 pbCodeBuf[off++] = X86_OP_REX_B;
5768 else if (idxGstTmpReg >= 4)
5769 pbCodeBuf[off++] = X86_OP_REX;
5770 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5771 pbCodeBuf[off++] = u8Value;
5772 }
5773 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5774 else if (idxGstTmpReg < 4)
5775 {
5776 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5777 pbCodeBuf[off++] = u8Value;
5778 }
5779 else
5780 {
5781 /* ror reg64, 8 */
5782 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5783 pbCodeBuf[off++] = 0xc1;
5784 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5785 pbCodeBuf[off++] = 8;
5786
5787 /* mov reg8, imm8 */
5788 if (idxGstTmpReg >= 8)
5789 pbCodeBuf[off++] = X86_OP_REX_B;
5790 else if (idxGstTmpReg >= 4)
5791 pbCodeBuf[off++] = X86_OP_REX;
5792 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5793 pbCodeBuf[off++] = u8Value;
5794
5795 /* rol reg64, 8 */
5796 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5797 pbCodeBuf[off++] = 0xc1;
5798 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5799 pbCodeBuf[off++] = 8;
5800 }
5801
5802#elif defined(RT_ARCH_ARM64)
5803 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5804 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5805 if (iGRegEx < 16)
5806 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5807 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5808 else
5809 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5810 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5811 iemNativeRegFreeTmp(pReNative, idxImmReg);
5812
5813#else
5814# error "Port me!"
5815#endif
5816
5817 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5818
5819 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
5820
5821 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5822 return off;
5823}
5824
5825
5826/*
5827 * General purpose register manipulation (add, sub).
5828 */
5829
5830#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5831 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5832
5833/** Emits code for IEM_MC_SUB_GREG_U16. */
5834DECL_INLINE_THROW(uint32_t)
5835iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5836{
5837 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5838 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5839 kIemNativeGstRegUse_ForUpdate);
5840
5841#ifdef RT_ARCH_AMD64
5842 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5843 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5844 if (idxGstTmpReg >= 8)
5845 pbCodeBuf[off++] = X86_OP_REX_B;
5846 if (uSubtrahend)
5847 {
5848 pbCodeBuf[off++] = 0xff; /* dec */
5849 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5850 }
5851 else
5852 {
5853 pbCodeBuf[off++] = 0x81;
5854 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5855 pbCodeBuf[off++] = uSubtrahend;
5856 pbCodeBuf[off++] = 0;
5857 }
5858
5859#else
5860 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5861 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5862
5863 /* sub tmp, gstgrp, uSubtrahend */
5864 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5865
5866 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5867 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5868
5869 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5870#endif
5871
5872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5873
5874 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5875
5876 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5877 return off;
5878}
5879
5880
5881#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5882 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5883
5884#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5885 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5886
5887/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5888DECL_INLINE_THROW(uint32_t)
5889iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5890{
5891 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5892 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5893 kIemNativeGstRegUse_ForUpdate);
5894
5895#ifdef RT_ARCH_AMD64
5896 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5897 if (f64Bit)
5898 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5899 else if (idxGstTmpReg >= 8)
5900 pbCodeBuf[off++] = X86_OP_REX_B;
5901 if (uSubtrahend == 1)
5902 {
5903 /* dec */
5904 pbCodeBuf[off++] = 0xff;
5905 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5906 }
5907 else if (uSubtrahend < 128)
5908 {
5909 pbCodeBuf[off++] = 0x83; /* sub */
5910 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5911 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5912 }
5913 else
5914 {
5915 pbCodeBuf[off++] = 0x81; /* sub */
5916 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5917 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5918 pbCodeBuf[off++] = 0;
5919 pbCodeBuf[off++] = 0;
5920 pbCodeBuf[off++] = 0;
5921 }
5922
5923#else
5924 /* sub tmp, gstgrp, uSubtrahend */
5925 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5926 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5927
5928#endif
5929
5930 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5931
5932 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5933
5934 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5935 return off;
5936}
5937
5938
5939
5940/*********************************************************************************************************************************
5941* Builtin functions *
5942*********************************************************************************************************************************/
5943
5944/**
5945 * Built-in function that calls a C-implemention function taking zero arguments.
5946 */
5947static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
5948{
5949 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
5950 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
5951 /** @todo Drop this crap hack?
5952 * We don't have the flush mask here so we we must pass UINT64_MAX. */
5953 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, UINT64_MAX, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
5954}
5955
5956
5957/**
5958 * Built-in function that checks for pending interrupts that can be delivered or
5959 * forced action flags.
5960 *
5961 * This triggers after the completion of an instruction, so EIP is already at
5962 * the next instruction. If an IRQ or important FF is pending, this will return
5963 * a non-zero status that stops TB execution.
5964 */
5965static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
5966{
5967 RT_NOREF(pCallEntry);
5968
5969 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
5970 and I'm too lazy to create a 'Fixed' version of that one. */
5971 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
5972 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
5973
5974 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
5975
5976 /* Again, we need to load the extended EFLAGS before we actually need them
5977 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
5978 loaded them inside the check, as the shadow state would not be correct
5979 when the code branches before the load. Ditto PC. */
5980 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5981 kIemNativeGstRegUse_ReadOnly);
5982
5983 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
5984
5985 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5986
5987 /*
5988 * Start by checking the local forced actions of the EMT we're on for IRQs
5989 * and other FFs that needs servicing.
5990 */
5991 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
5992 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
5993 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
5994 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5995 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
5996 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
5997 | VMCPU_FF_TLB_FLUSH
5998 | VMCPU_FF_UNHALT ),
5999 true /*fSetFlags*/);
6000 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
6001 uint32_t const offFixupJumpToVmCheck1 = off;
6002 off = iemNativeEmitJzToFixed(pReNative, off, 0);
6003
6004 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
6005 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
6006 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6007 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
6008 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
6009 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6010
6011 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
6012 suppressed by the CPU or not. */
6013 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
6014 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
6015 idxLabelReturnBreak);
6016
6017 /* We've got shadow flags set, so we must check that the PC they are valid
6018 for matches our current PC value. */
6019 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
6020 * a register. */
6021 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
6022 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
6023
6024 /*
6025 * Now check the force flags of the VM.
6026 */
6027 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
6028 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
6029 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
6030 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
6031 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
6032 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6033
6034 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
6035
6036 /*
6037 * We're good, no IRQs or FFs pending.
6038 */
6039 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6040 iemNativeRegFreeTmp(pReNative, idxEflReg);
6041 iemNativeRegFreeTmp(pReNative, idxPcReg);
6042
6043 return off;
6044}
6045
6046
6047/**
6048 * Built-in function checks if IEMCPU::fExec has the expected value.
6049 */
6050static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
6051{
6052 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
6053 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6054
6055 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6056 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
6057 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
6058 kIemNativeLabelType_ReturnBreak);
6059 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6060 return off;
6061}
6062
6063
6064
6065/*********************************************************************************************************************************
6066* The native code generator functions for each MC block. *
6067*********************************************************************************************************************************/
6068
6069
6070/*
6071 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6072 *
6073 * This should probably live in it's own file later, but lets see what the
6074 * compile times turn out to be first.
6075 */
6076#include "IEMNativeFunctions.cpp.h"
6077
6078
6079
6080/*********************************************************************************************************************************
6081* Recompiler Core. *
6082*********************************************************************************************************************************/
6083
6084
6085/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6086static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6087{
6088 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6089 pDis->cbCachedInstr += cbMaxRead;
6090 RT_NOREF(cbMinRead);
6091 return VERR_NO_DATA;
6092}
6093
6094
6095/**
6096 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6097 * @returns pszBuf.
6098 * @param fFlags The flags.
6099 * @param pszBuf The output buffer.
6100 * @param cbBuf The output buffer size. At least 32 bytes.
6101 */
6102DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6103{
6104 Assert(cbBuf >= 32);
6105 static RTSTRTUPLE const s_aModes[] =
6106 {
6107 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6108 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6109 /* [02] = */ { RT_STR_TUPLE("!2!") },
6110 /* [03] = */ { RT_STR_TUPLE("!3!") },
6111 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6112 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6113 /* [06] = */ { RT_STR_TUPLE("!6!") },
6114 /* [07] = */ { RT_STR_TUPLE("!7!") },
6115 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6116 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6117 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6118 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6119 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6120 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6121 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6122 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6123 /* [10] = */ { RT_STR_TUPLE("!10!") },
6124 /* [11] = */ { RT_STR_TUPLE("!11!") },
6125 /* [12] = */ { RT_STR_TUPLE("!12!") },
6126 /* [13] = */ { RT_STR_TUPLE("!13!") },
6127 /* [14] = */ { RT_STR_TUPLE("!14!") },
6128 /* [15] = */ { RT_STR_TUPLE("!15!") },
6129 /* [16] = */ { RT_STR_TUPLE("!16!") },
6130 /* [17] = */ { RT_STR_TUPLE("!17!") },
6131 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6132 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6133 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6134 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6135 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6136 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6137 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6138 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6139 };
6140 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6141 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6142 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6143
6144 pszBuf[off++] = ' ';
6145 pszBuf[off++] = 'C';
6146 pszBuf[off++] = 'P';
6147 pszBuf[off++] = 'L';
6148 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6149 Assert(off < 32);
6150
6151 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6152
6153 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6154 {
6155 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6156 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6157 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6158 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6159 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6160 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6161 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6162 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6163 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6164 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6165 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6166 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6167 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6168 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6169 };
6170 if (fFlags)
6171 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6172 if (s_aFlags[i].fFlag & fFlags)
6173 {
6174 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6175 pszBuf[off++] = ' ';
6176 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6177 off += s_aFlags[i].cchName;
6178 fFlags &= ~s_aFlags[i].fFlag;
6179 if (!fFlags)
6180 break;
6181 }
6182 pszBuf[off] = '\0';
6183
6184 return pszBuf;
6185}
6186
6187
6188DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
6189{
6190 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
6191
6192 char szDisBuf[512];
6193 DISSTATE Dis;
6194 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
6195 uint32_t const cNative = pTb->Native.cInstructions;
6196 uint32_t offNative = 0;
6197#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6198 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
6199#endif
6200 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6201 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6202 : DISCPUMODE_64BIT;
6203#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6204 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
6205#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6206 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
6207#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6208# error "Port me"
6209#else
6210 csh hDisasm = ~(size_t)0;
6211# if defined(RT_ARCH_AMD64)
6212 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
6213# elif defined(RT_ARCH_ARM64)
6214 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
6215# else
6216# error "Port me"
6217# endif
6218 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
6219#endif
6220
6221 /*
6222 * Print TB info.
6223 */
6224 pHlp->pfnPrintf(pHlp,
6225 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
6226 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
6227 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
6228 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
6229#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6230 if (pDbgInfo && pDbgInfo->cEntries > 1)
6231 {
6232 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
6233
6234 /*
6235 * This disassembly is driven by the debug info which follows the native
6236 * code and indicates when it starts with the next guest instructions,
6237 * where labels are and such things.
6238 */
6239 uint32_t idxThreadedCall = 0;
6240 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
6241 uint8_t idxRange = UINT8_MAX;
6242 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
6243 uint32_t offRange = 0;
6244 uint32_t offOpcodes = 0;
6245 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
6246 uint32_t const cDbgEntries = pDbgInfo->cEntries;
6247 uint32_t iDbgEntry = 1;
6248 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
6249
6250 while (offNative < cNative)
6251 {
6252 /* If we're at or have passed the point where the next chunk of debug
6253 info starts, process it. */
6254 if (offDbgNativeNext <= offNative)
6255 {
6256 offDbgNativeNext = UINT32_MAX;
6257 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
6258 {
6259 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
6260 {
6261 case kIemTbDbgEntryType_GuestInstruction:
6262 {
6263 /* Did the exec flag change? */
6264 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
6265 {
6266 pHlp->pfnPrintf(pHlp,
6267 " fExec change %#08x -> %#08x %s\n",
6268 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6269 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6270 szDisBuf, sizeof(szDisBuf)));
6271 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
6272 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6273 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6274 : DISCPUMODE_64BIT;
6275 }
6276
6277 /* New opcode range? We need to fend up a spurious debug info entry here for cases
6278 where the compilation was aborted before the opcode was recorded and the actual
6279 instruction was translated to a threaded call. This may happen when we run out
6280 of ranges, or when some complicated interrupts/FFs are found to be pending or
6281 similar. So, we just deal with it here rather than in the compiler code as it
6282 is a lot simpler to do up here. */
6283 if ( idxRange == UINT8_MAX
6284 || idxRange >= cRanges
6285 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
6286 {
6287 idxRange += 1;
6288 if (idxRange < cRanges)
6289 offRange = 0;
6290 else
6291 continue;
6292 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
6293 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
6294 + (pTb->aRanges[idxRange].idxPhysPage == 0
6295 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6296 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
6297 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6298 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
6299 pTb->aRanges[idxRange].idxPhysPage);
6300 }
6301
6302 /* Disassemble the instruction. */
6303 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
6304 uint32_t cbInstr = 1;
6305 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6306 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
6307 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6308 if (RT_SUCCESS(rc))
6309 {
6310 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6311 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6312 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6313 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6314
6315 static unsigned const s_offMarker = 55;
6316 static char const s_szMarker[] = " ; <--- guest";
6317 if (cch < s_offMarker)
6318 {
6319 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
6320 cch = s_offMarker;
6321 }
6322 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
6323 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
6324
6325 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
6326 }
6327 else
6328 {
6329 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
6330 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
6331 cbInstr = 1;
6332 }
6333 GCPhysPc += cbInstr;
6334 offOpcodes += cbInstr;
6335 offRange += cbInstr;
6336 continue;
6337 }
6338
6339 case kIemTbDbgEntryType_ThreadedCall:
6340 pHlp->pfnPrintf(pHlp,
6341 " Call #%u to %s (%u args)%s\n",
6342 idxThreadedCall,
6343 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6344 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6345 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
6346 idxThreadedCall++;
6347 continue;
6348
6349 case kIemTbDbgEntryType_GuestRegShadowing:
6350 {
6351 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
6352 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
6353 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
6354 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
6355 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6356 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
6357 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
6358 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
6359 else
6360 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
6361 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
6362 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6363 continue;
6364 }
6365
6366 case kIemTbDbgEntryType_Label:
6367 {
6368 const char *pszName = "what_the_fudge";
6369 const char *pszComment = "";
6370 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
6371 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
6372 {
6373 case kIemNativeLabelType_Return:
6374 pszName = "Return";
6375 break;
6376 case kIemNativeLabelType_ReturnBreak:
6377 pszName = "ReturnBreak";
6378 break;
6379 case kIemNativeLabelType_ReturnWithFlags:
6380 pszName = "ReturnWithFlags";
6381 break;
6382 case kIemNativeLabelType_NonZeroRetOrPassUp:
6383 pszName = "NonZeroRetOrPassUp";
6384 break;
6385 case kIemNativeLabelType_RaiseGp0:
6386 pszName = "RaiseGp0";
6387 break;
6388 case kIemNativeLabelType_If:
6389 pszName = "If";
6390 fNumbered = true;
6391 break;
6392 case kIemNativeLabelType_Else:
6393 pszName = "Else";
6394 fNumbered = true;
6395 pszComment = " ; regs state restored pre-if-block";
6396 break;
6397 case kIemNativeLabelType_Endif:
6398 pszName = "Endif";
6399 fNumbered = true;
6400 break;
6401 case kIemNativeLabelType_CheckIrq:
6402 pszName = "CheckIrq_CheckVM";
6403 fNumbered = true;
6404 break;
6405 case kIemNativeLabelType_Invalid:
6406 case kIemNativeLabelType_End:
6407 break;
6408 }
6409 if (fNumbered)
6410 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
6411 else
6412 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
6413 continue;
6414 }
6415
6416 case kIemTbDbgEntryType_NativeOffset:
6417 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
6418 Assert(offDbgNativeNext > offNative);
6419 break;
6420
6421 default:
6422 AssertFailed();
6423 }
6424 iDbgEntry++;
6425 break;
6426 }
6427 }
6428
6429 /*
6430 * Disassemble the next native instruction.
6431 */
6432 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6433# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6434 uint32_t cbInstr = sizeof(paNative[0]);
6435 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6436 if (RT_SUCCESS(rc))
6437 {
6438# if defined(RT_ARCH_AMD64)
6439 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6440 {
6441 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6442 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6443 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
6444 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6445 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6446 uInfo & 0x8000 ? " - recompiled" : "");
6447 else
6448 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6449 }
6450 else
6451# endif
6452 {
6453# ifdef RT_ARCH_AMD64
6454 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6455 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6456 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6457 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6458# elif defined(RT_ARCH_ARM64)
6459 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6460 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6461 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6462# else
6463# error "Port me"
6464# endif
6465 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6466 }
6467 }
6468 else
6469 {
6470# if defined(RT_ARCH_AMD64)
6471 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6472 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6473# elif defined(RT_ARCH_ARM64)
6474 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6475# else
6476# error "Port me"
6477# endif
6478 cbInstr = sizeof(paNative[0]);
6479 }
6480 offNative += cbInstr / sizeof(paNative[0]);
6481
6482# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6483 cs_insn *pInstr;
6484 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6485 (uintptr_t)pNativeCur, 1, &pInstr);
6486 if (cInstrs > 0)
6487 {
6488 Assert(cInstrs == 1);
6489# if defined(RT_ARCH_AMD64)
6490 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6491 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6492# else
6493 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6494 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6495# endif
6496 offNative += pInstr->size / sizeof(*pNativeCur);
6497 cs_free(pInstr, cInstrs);
6498 }
6499 else
6500 {
6501# if defined(RT_ARCH_AMD64)
6502 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6503 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6504# else
6505 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6506# endif
6507 offNative++;
6508 }
6509# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6510 }
6511 }
6512 else
6513#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
6514 {
6515 /*
6516 * No debug info, just disassemble the x86 code and then the native code.
6517 *
6518 * First the guest code:
6519 */
6520 for (unsigned i = 0; i < pTb->cRanges; i++)
6521 {
6522 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
6523 + (pTb->aRanges[i].idxPhysPage == 0
6524 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6525 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
6526 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6527 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
6528 unsigned off = pTb->aRanges[i].offOpcodes;
6529 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
6530 while (off < cbOpcodes)
6531 {
6532 uint32_t cbInstr = 1;
6533 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6534 &pTb->pabOpcodes[off], cbOpcodes - off,
6535 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6536 if (RT_SUCCESS(rc))
6537 {
6538 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6539 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6540 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6541 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6542 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
6543 GCPhysPc += cbInstr;
6544 off += cbInstr;
6545 }
6546 else
6547 {
6548 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
6549 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
6550 break;
6551 }
6552 }
6553 }
6554
6555 /*
6556 * Then the native code:
6557 */
6558 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
6559 while (offNative < cNative)
6560 {
6561 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6562# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6563 uint32_t cbInstr = sizeof(paNative[0]);
6564 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6565 if (RT_SUCCESS(rc))
6566 {
6567# if defined(RT_ARCH_AMD64)
6568 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6569 {
6570 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6571 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6572 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
6573 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6574 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6575 uInfo & 0x8000 ? " - recompiled" : "");
6576 else
6577 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6578 }
6579 else
6580# endif
6581 {
6582# ifdef RT_ARCH_AMD64
6583 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6584 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6585 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6586 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6587# elif defined(RT_ARCH_ARM64)
6588 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6589 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6590 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6591# else
6592# error "Port me"
6593# endif
6594 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6595 }
6596 }
6597 else
6598 {
6599# if defined(RT_ARCH_AMD64)
6600 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6601 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6602# else
6603 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6604# endif
6605 cbInstr = sizeof(paNative[0]);
6606 }
6607 offNative += cbInstr / sizeof(paNative[0]);
6608
6609# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6610 cs_insn *pInstr;
6611 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6612 (uintptr_t)pNativeCur, 1, &pInstr);
6613 if (cInstrs > 0)
6614 {
6615 Assert(cInstrs == 1);
6616# if defined(RT_ARCH_AMD64)
6617 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6618 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6619# else
6620 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6621 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6622# endif
6623 offNative += pInstr->size / sizeof(*pNativeCur);
6624 cs_free(pInstr, cInstrs);
6625 }
6626 else
6627 {
6628# if defined(RT_ARCH_AMD64)
6629 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6630 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6631# else
6632 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6633# endif
6634 offNative++;
6635 }
6636# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6637 }
6638 }
6639
6640#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6641 /* Cleanup. */
6642 cs_close(&hDisasm);
6643#endif
6644}
6645
6646
6647/**
6648 * Recompiles the given threaded TB into a native one.
6649 *
6650 * In case of failure the translation block will be returned as-is.
6651 *
6652 * @returns pTb.
6653 * @param pVCpu The cross context virtual CPU structure of the calling
6654 * thread.
6655 * @param pTb The threaded translation to recompile to native.
6656 */
6657DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
6658{
6659 /*
6660 * The first time thru, we allocate the recompiler state, the other times
6661 * we just need to reset it before using it again.
6662 */
6663 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
6664 if (RT_LIKELY(pReNative))
6665 iemNativeReInit(pReNative, pTb);
6666 else
6667 {
6668 pReNative = iemNativeInit(pVCpu, pTb);
6669 AssertReturn(pReNative, pTb);
6670 }
6671
6672 /*
6673 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
6674 * for aborting if an error happens.
6675 */
6676 uint32_t cCallsLeft = pTb->Thrd.cCalls;
6677#ifdef LOG_ENABLED
6678 uint32_t const cCallsOrg = cCallsLeft;
6679#endif
6680 uint32_t off = 0;
6681 int rc = VINF_SUCCESS;
6682 IEMNATIVE_TRY_SETJMP(pReNative, rc)
6683 {
6684 /*
6685 * Emit prolog code (fixed).
6686 */
6687 off = iemNativeEmitProlog(pReNative, off);
6688
6689 /*
6690 * Convert the calls to native code.
6691 */
6692#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6693 int32_t iGstInstr = -1;
6694 uint32_t fExec = pTb->fFlags;
6695#endif
6696 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
6697 while (cCallsLeft-- > 0)
6698 {
6699 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
6700
6701 /*
6702 * Debug info and assembly markup.
6703 */
6704#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6705 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
6706 fExec = pCallEntry->auParams[0];
6707 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6708 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
6709 {
6710 if (iGstInstr < (int32_t)pTb->cInstructions)
6711 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
6712 else
6713 Assert(iGstInstr == pTb->cInstructions);
6714 iGstInstr = pCallEntry->idxInstr;
6715 }
6716 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
6717#endif
6718#if defined(VBOX_STRICT) && 1
6719 off = iemNativeEmitMarker(pReNative, off,
6720 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
6721 pCallEntry->enmFunction));
6722#endif
6723
6724 /*
6725 * Actual work.
6726 */
6727 if (pfnRecom) /** @todo stats on this. */
6728 {
6729 //STAM_COUNTER_INC()
6730 off = pfnRecom(pReNative, off, pCallEntry);
6731 }
6732 else
6733 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
6734 Assert(off <= pReNative->cInstrBufAlloc);
6735 Assert(pReNative->cCondDepth == 0);
6736
6737 /*
6738 * Advance.
6739 */
6740 pCallEntry++;
6741 }
6742
6743 /*
6744 * Emit the epilog code.
6745 */
6746 uint32_t idxReturnLabel;
6747 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
6748
6749 /*
6750 * Generate special jump labels.
6751 */
6752 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
6753 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
6754 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
6755 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
6756 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
6757 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
6758 }
6759 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
6760 {
6761 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
6762 return pTb;
6763 }
6764 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
6765 Assert(off <= pReNative->cInstrBufAlloc);
6766
6767 /*
6768 * Make sure all labels has been defined.
6769 */
6770 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
6771#ifdef VBOX_STRICT
6772 uint32_t const cLabels = pReNative->cLabels;
6773 for (uint32_t i = 0; i < cLabels; i++)
6774 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
6775#endif
6776
6777 /*
6778 * Allocate executable memory, copy over the code we've generated.
6779 */
6780 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
6781 if (pTbAllocator->pDelayedFreeHead)
6782 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
6783
6784 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
6785 AssertReturn(paFinalInstrBuf, pTb);
6786 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
6787
6788 /*
6789 * Apply fixups.
6790 */
6791 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
6792 uint32_t const cFixups = pReNative->cFixups;
6793 for (uint32_t i = 0; i < cFixups; i++)
6794 {
6795 Assert(paFixups[i].off < off);
6796 Assert(paFixups[i].idxLabel < cLabels);
6797 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
6798 switch (paFixups[i].enmType)
6799 {
6800#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6801 case kIemNativeFixupType_Rel32:
6802 Assert(paFixups[i].off + 4 <= off);
6803 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6804 continue;
6805
6806#elif defined(RT_ARCH_ARM64)
6807 case kIemNativeFixupType_RelImm26At0:
6808 {
6809 Assert(paFixups[i].off < off);
6810 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6811 Assert(offDisp >= -262144 && offDisp < 262144);
6812 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6813 continue;
6814 }
6815
6816 case kIemNativeFixupType_RelImm19At5:
6817 {
6818 Assert(paFixups[i].off < off);
6819 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6820 Assert(offDisp >= -262144 && offDisp < 262144);
6821 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6822 continue;
6823 }
6824
6825 case kIemNativeFixupType_RelImm14At5:
6826 {
6827 Assert(paFixups[i].off < off);
6828 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6829 Assert(offDisp >= -8192 && offDisp < 8192);
6830 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
6831 continue;
6832 }
6833
6834#endif
6835 case kIemNativeFixupType_Invalid:
6836 case kIemNativeFixupType_End:
6837 break;
6838 }
6839 AssertFailed();
6840 }
6841
6842 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
6843
6844 /*
6845 * Convert the translation block.
6846 */
6847 //RT_BREAKPOINT();
6848 RTMemFree(pTb->Thrd.paCalls);
6849 pTb->Native.paInstructions = paFinalInstrBuf;
6850 pTb->Native.cInstructions = off;
6851 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
6852#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6853 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
6854 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
6855#endif
6856
6857 Assert(pTbAllocator->cThreadedTbs > 0);
6858 pTbAllocator->cThreadedTbs -= 1;
6859 pTbAllocator->cNativeTbs += 1;
6860 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
6861
6862#ifdef LOG_ENABLED
6863 /*
6864 * Disassemble to the log if enabled.
6865 */
6866 if (LogIs3Enabled())
6867 {
6868 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
6869 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
6870 }
6871#endif
6872
6873 return pTb;
6874}
6875
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette