VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101908

Last change on this file since 101908 was 101908, checked in by vboxsync, 15 months ago

VMM/IEM: Arm64 code generation fixes. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 293.8 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101908 2023-11-06 21:57:56Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMNativeFunctions.h"
102
103
104/*
105 * Narrow down configs here to avoid wasting time on unused configs here.
106 * Note! Same checks in IEMAllThrdRecompiler.cpp.
107 */
108
109#ifndef IEM_WITH_CODE_TLB
110# error The code TLB must be enabled for the recompiler.
111#endif
112
113#ifndef IEM_WITH_DATA_TLB
114# error The data TLB must be enabled for the recompiler.
115#endif
116
117#ifndef IEM_WITH_SETJMP
118# error The setjmp approach must be enabled for the recompiler.
119#endif
120
121/** @todo eliminate this clang build hack. */
122#if RT_CLANG_PREREQ(4, 0)
123# pragma GCC diagnostic ignored "-Wunused-function"
124#endif
125
126
127
128/*********************************************************************************************************************************
129* Defined Constants And Macros *
130*********************************************************************************************************************************/
131/** Always count instructions for now. */
132#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
133
134
135/*********************************************************************************************************************************
136* Internal Functions *
137*********************************************************************************************************************************/
138#ifdef VBOX_STRICT
139static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
140 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
141#endif
142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
143static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
144static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
145#endif
146
147
148/*********************************************************************************************************************************
149* Executable Memory Allocator *
150*********************************************************************************************************************************/
151/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
152 * Use an alternative chunk sub-allocator that does store internal data
153 * in the chunk.
154 *
155 * Using the RTHeapSimple is not practial on newer darwin systems where
156 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
157 * memory. We would have to change the protection of the whole chunk for
158 * every call to RTHeapSimple, which would be rather expensive.
159 *
160 * This alternative implemenation let restrict page protection modifications
161 * to the pages backing the executable memory we just allocated.
162 */
163#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
164/** The chunk sub-allocation unit size in bytes. */
165#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
166/** The chunk sub-allocation unit size as a shift factor. */
167#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
168
169#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
170# ifdef IEMNATIVE_USE_GDB_JIT
171# define IEMNATIVE_USE_GDB_JIT_ET_DYN
172
173/** GDB JIT: Code entry. */
174typedef struct GDBJITCODEENTRY
175{
176 struct GDBJITCODEENTRY *pNext;
177 struct GDBJITCODEENTRY *pPrev;
178 uint8_t *pbSymFile;
179 uint64_t cbSymFile;
180} GDBJITCODEENTRY;
181
182/** GDB JIT: Actions. */
183typedef enum GDBJITACTIONS : uint32_t
184{
185 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
186} GDBJITACTIONS;
187
188/** GDB JIT: Descriptor. */
189typedef struct GDBJITDESCRIPTOR
190{
191 uint32_t uVersion;
192 GDBJITACTIONS enmAction;
193 GDBJITCODEENTRY *pRelevant;
194 GDBJITCODEENTRY *pHead;
195 /** Our addition: */
196 GDBJITCODEENTRY *pTail;
197} GDBJITDESCRIPTOR;
198
199/** GDB JIT: Our simple symbol file data. */
200typedef struct GDBJITSYMFILE
201{
202 Elf64_Ehdr EHdr;
203# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
204 Elf64_Shdr aShdrs[5];
205# else
206 Elf64_Shdr aShdrs[7];
207 Elf64_Phdr aPhdrs[2];
208# endif
209 /** The dwarf ehframe data for the chunk. */
210 uint8_t abEhFrame[512];
211 char szzStrTab[128];
212 Elf64_Sym aSymbols[3];
213# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
214 Elf64_Sym aDynSyms[2];
215 Elf64_Dyn aDyn[6];
216# endif
217} GDBJITSYMFILE;
218
219extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
220extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
221
222/** Init once for g_IemNativeGdbJitLock. */
223static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
224/** Init once for the critical section. */
225static RTCRITSECT g_IemNativeGdbJitLock;
226
227/** GDB reads the info here. */
228GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
229
230/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
231DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
232{
233 ASMNopPause();
234}
235
236/** @callback_method_impl{FNRTONCE} */
237static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
238{
239 RT_NOREF(pvUser);
240 return RTCritSectInit(&g_IemNativeGdbJitLock);
241}
242
243
244# endif /* IEMNATIVE_USE_GDB_JIT */
245
246/**
247 * Per-chunk unwind info for non-windows hosts.
248 */
249typedef struct IEMEXECMEMCHUNKEHFRAME
250{
251# ifdef IEMNATIVE_USE_LIBUNWIND
252 /** The offset of the FDA into abEhFrame. */
253 uintptr_t offFda;
254# else
255 /** 'struct object' storage area. */
256 uint8_t abObject[1024];
257# endif
258# ifdef IEMNATIVE_USE_GDB_JIT
259# if 0
260 /** The GDB JIT 'symbol file' data. */
261 GDBJITSYMFILE GdbJitSymFile;
262# endif
263 /** The GDB JIT list entry. */
264 GDBJITCODEENTRY GdbJitEntry;
265# endif
266 /** The dwarf ehframe data for the chunk. */
267 uint8_t abEhFrame[512];
268} IEMEXECMEMCHUNKEHFRAME;
269/** Pointer to per-chunk info info for non-windows hosts. */
270typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
271#endif
272
273
274/**
275 * An chunk of executable memory.
276 */
277typedef struct IEMEXECMEMCHUNK
278{
279#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
280 /** Number of free items in this chunk. */
281 uint32_t cFreeUnits;
282 /** Hint were to start searching for free space in the allocation bitmap. */
283 uint32_t idxFreeHint;
284#else
285 /** The heap handle. */
286 RTHEAPSIMPLE hHeap;
287#endif
288 /** Pointer to the chunk. */
289 void *pvChunk;
290#ifdef IN_RING3
291 /**
292 * Pointer to the unwind information.
293 *
294 * This is used during C++ throw and longjmp (windows and probably most other
295 * platforms). Some debuggers (windbg) makes use of it as well.
296 *
297 * Windows: This is allocated from hHeap on windows because (at least for
298 * AMD64) the UNWIND_INFO structure address in the
299 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
300 *
301 * Others: Allocated from the regular heap to avoid unnecessary executable data
302 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
303 void *pvUnwindInfo;
304#elif defined(IN_RING0)
305 /** Allocation handle. */
306 RTR0MEMOBJ hMemObj;
307#endif
308} IEMEXECMEMCHUNK;
309/** Pointer to a memory chunk. */
310typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
311
312
313/**
314 * Executable memory allocator for the native recompiler.
315 */
316typedef struct IEMEXECMEMALLOCATOR
317{
318 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
319 uint32_t uMagic;
320
321 /** The chunk size. */
322 uint32_t cbChunk;
323 /** The maximum number of chunks. */
324 uint32_t cMaxChunks;
325 /** The current number of chunks. */
326 uint32_t cChunks;
327 /** Hint where to start looking for available memory. */
328 uint32_t idxChunkHint;
329 /** Statistics: Current number of allocations. */
330 uint32_t cAllocations;
331
332 /** The total amount of memory available. */
333 uint64_t cbTotal;
334 /** Total amount of free memory. */
335 uint64_t cbFree;
336 /** Total amount of memory allocated. */
337 uint64_t cbAllocated;
338
339#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
340 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
341 *
342 * Since the chunk size is a power of two and the minimum chunk size is a lot
343 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
344 * require a whole number of uint64_t elements in the allocation bitmap. So,
345 * for sake of simplicity, they are allocated as one continous chunk for
346 * simplicity/laziness. */
347 uint64_t *pbmAlloc;
348 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
349 uint32_t cUnitsPerChunk;
350 /** Number of bitmap elements per chunk (for quickly locating the bitmap
351 * portion corresponding to an chunk). */
352 uint32_t cBitmapElementsPerChunk;
353#else
354 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
355 * @{ */
356 /** The size of the heap internal block header. This is used to adjust the
357 * request memory size to make sure there is exacly enough room for a header at
358 * the end of the blocks we allocate before the next 64 byte alignment line. */
359 uint32_t cbHeapBlockHdr;
360 /** The size of initial heap allocation required make sure the first
361 * allocation is correctly aligned. */
362 uint32_t cbHeapAlignTweak;
363 /** The alignment tweak allocation address. */
364 void *pvAlignTweak;
365 /** @} */
366#endif
367
368#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
369 /** Pointer to the array of unwind info running parallel to aChunks (same
370 * allocation as this structure, located after the bitmaps).
371 * (For Windows, the structures must reside in 32-bit RVA distance to the
372 * actual chunk, so they are allocated off the chunk.) */
373 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
374#endif
375
376 /** The allocation chunks. */
377 RT_FLEXIBLE_ARRAY_EXTENSION
378 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
379} IEMEXECMEMALLOCATOR;
380/** Pointer to an executable memory allocator. */
381typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
382
383/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
384#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
385
386
387static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
388
389
390/**
391 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
392 * the heap statistics.
393 */
394static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
395 uint32_t cbReq, uint32_t idxChunk)
396{
397 pExecMemAllocator->cAllocations += 1;
398 pExecMemAllocator->cbAllocated += cbReq;
399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
400 pExecMemAllocator->cbFree -= cbReq;
401#else
402 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
403#endif
404 pExecMemAllocator->idxChunkHint = idxChunk;
405
406#ifdef RT_OS_DARWIN
407 /*
408 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
409 * on darwin. So, we mark the pages returned as read+write after alloc and
410 * expect the caller to call iemExecMemAllocatorReadyForUse when done
411 * writing to the allocation.
412 *
413 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
414 * for details.
415 */
416 /** @todo detect if this is necessary... it wasn't required on 10.15 or
417 * whatever older version it was. */
418 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
419 AssertRC(rc);
420#endif
421
422 return pvRet;
423}
424
425
426#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
427static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
428 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
429{
430 /*
431 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
432 */
433 Assert(!(cToScan & 63));
434 Assert(!(idxFirst & 63));
435 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
436 pbmAlloc += idxFirst / 64;
437
438 /*
439 * Scan the bitmap for cReqUnits of consequtive clear bits
440 */
441 /** @todo This can probably be done more efficiently for non-x86 systems. */
442 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
443 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
444 {
445 uint32_t idxAddBit = 1;
446 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
447 idxAddBit++;
448 if (idxAddBit >= cReqUnits)
449 {
450 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
451
452 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
453 pChunk->cFreeUnits -= cReqUnits;
454 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
455
456 void * const pvRet = (uint8_t *)pChunk->pvChunk
457 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
458
459 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
460 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
461 }
462
463 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
464 }
465 return NULL;
466}
467#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
468
469
470static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
471{
472#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
473 /*
474 * Figure out how much to allocate.
475 */
476 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
477 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
478 {
479 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
480 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
481 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
482 {
483 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
484 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
485 if (pvRet)
486 return pvRet;
487 }
488 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
489 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
490 cReqUnits, idxChunk);
491 }
492#else
493 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
494 if (pvRet)
495 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
496#endif
497 return NULL;
498
499}
500
501
502/**
503 * Allocates @a cbReq bytes of executable memory.
504 *
505 * @returns Pointer to the memory, NULL if out of memory or other problem
506 * encountered.
507 * @param pVCpu The cross context virtual CPU structure of the calling
508 * thread.
509 * @param cbReq How many bytes are required.
510 */
511static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
512{
513 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
514 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
515 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
516
517 /*
518 * Adjust the request size so it'll fit the allocator alignment/whatnot.
519 *
520 * For the RTHeapSimple allocator this means to follow the logic described
521 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
522 * existing chunks if we think we've got sufficient free memory around.
523 *
524 * While for the alternative one we just align it up to a whole unit size.
525 */
526#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
527 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
528#else
529 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
530#endif
531 if (cbReq <= pExecMemAllocator->cbFree)
532 {
533 uint32_t const cChunks = pExecMemAllocator->cChunks;
534 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
535 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
536 {
537 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
538 if (pvRet)
539 return pvRet;
540 }
541 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 }
548
549 /*
550 * Can we grow it with another chunk?
551 */
552 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
553 {
554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
555 AssertLogRelRCReturn(rc, NULL);
556
557 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
558 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
559 if (pvRet)
560 return pvRet;
561 AssertFailed();
562 }
563
564 /* What now? Prune native translation blocks from the cache? */
565 AssertFailed();
566 return NULL;
567}
568
569
570/** This is a hook that we may need later for changing memory protection back
571 * to readonly+exec */
572static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
573{
574#ifdef RT_OS_DARWIN
575 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
576 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
577 AssertRC(rc); RT_NOREF(pVCpu);
578
579 /*
580 * Flush the instruction cache:
581 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
582 */
583 /* sys_dcache_flush(pv, cb); - not necessary */
584 sys_icache_invalidate(pv, cb);
585#else
586 RT_NOREF(pVCpu, pv, cb);
587#endif
588}
589
590
591/**
592 * Frees executable memory.
593 */
594void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
595{
596 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
597 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
598 Assert(pv);
599#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
600 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
601#else
602 Assert(!((uintptr_t)pv & 63));
603#endif
604
605 /* Align the size as we did when allocating the block. */
606#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
607 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
608#else
609 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
610#endif
611
612 /* Free it / assert sanity. */
613#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
614 uint32_t const cChunks = pExecMemAllocator->cChunks;
615 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
616 bool fFound = false;
617 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
618 {
619 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
620 fFound = offChunk < cbChunk;
621 if (fFound)
622 {
623#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
624 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
625 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
626
627 /* Check that it's valid and free it. */
628 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
629 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
630 for (uint32_t i = 1; i < cReqUnits; i++)
631 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
632 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
633
634 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
635 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
636
637 /* Update the stats. */
638 pExecMemAllocator->cbAllocated -= cb;
639 pExecMemAllocator->cbFree += cb;
640 pExecMemAllocator->cAllocations -= 1;
641 return;
642#else
643 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
644 break;
645#endif
646 }
647 }
648# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
649 AssertFailed();
650# else
651 Assert(fFound);
652# endif
653#endif
654
655#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
656 /* Update stats while cb is freshly calculated.*/
657 pExecMemAllocator->cbAllocated -= cb;
658 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
659 pExecMemAllocator->cAllocations -= 1;
660
661 /* Free it. */
662 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
663#endif
664}
665
666
667
668#ifdef IN_RING3
669# ifdef RT_OS_WINDOWS
670
671/**
672 * Initializes the unwind info structures for windows hosts.
673 */
674static int
675iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
676 void *pvChunk, uint32_t idxChunk)
677{
678 RT_NOREF(pVCpu);
679
680 /*
681 * The AMD64 unwind opcodes.
682 *
683 * This is a program that starts with RSP after a RET instruction that
684 * ends up in recompiled code, and the operations we describe here will
685 * restore all non-volatile registers and bring RSP back to where our
686 * RET address is. This means it's reverse order from what happens in
687 * the prologue.
688 *
689 * Note! Using a frame register approach here both because we have one
690 * and but mainly because the UWOP_ALLOC_LARGE argument values
691 * would be a pain to write initializers for. On the positive
692 * side, we're impervious to changes in the the stack variable
693 * area can can deal with dynamic stack allocations if necessary.
694 */
695 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
696 {
697 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
698 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
699 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
700 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
701 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
702 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
703 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
704 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
705 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
706 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
707 };
708 union
709 {
710 IMAGE_UNWIND_INFO Info;
711 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
712 } s_UnwindInfo =
713 {
714 {
715 /* .Version = */ 1,
716 /* .Flags = */ 0,
717 /* .SizeOfProlog = */ 16, /* whatever */
718 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
719 /* .FrameRegister = */ X86_GREG_xBP,
720 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
721 }
722 };
723 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
724 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
725
726 /*
727 * Calc how much space we need and allocate it off the exec heap.
728 */
729 unsigned const cFunctionEntries = 1;
730 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
731 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
732# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
733 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
734 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
735 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
736# else
737 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
738 - pExecMemAllocator->cbHeapBlockHdr;
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
740 32 /*cbAlignment*/);
741# endif
742 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
743 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
744
745 /*
746 * Initialize the structures.
747 */
748 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
749
750 paFunctions[0].BeginAddress = 0;
751 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
752 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
753
754 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
755 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
756
757 /*
758 * Register it.
759 */
760 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
761 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
762
763 return VINF_SUCCESS;
764}
765
766
767# else /* !RT_OS_WINDOWS */
768
769/**
770 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
771 */
772DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
773{
774 if (iValue >= 64)
775 {
776 Assert(iValue < 0x2000);
777 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
778 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
779 }
780 else if (iValue >= 0)
781 *Ptr.pb++ = (uint8_t)iValue;
782 else if (iValue > -64)
783 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
784 else
785 {
786 Assert(iValue > -0x2000);
787 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
788 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
789 }
790 return Ptr;
791}
792
793
794/**
795 * Emits an ULEB128 encoded value (up to 64-bit wide).
796 */
797DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
798{
799 while (uValue >= 0x80)
800 {
801 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
802 uValue >>= 7;
803 }
804 *Ptr.pb++ = (uint8_t)uValue;
805 return Ptr;
806}
807
808
809/**
810 * Emits a CFA rule as register @a uReg + offset @a off.
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
813{
814 *Ptr.pb++ = DW_CFA_def_cfa;
815 Ptr = iemDwarfPutUleb128(Ptr, uReg);
816 Ptr = iemDwarfPutUleb128(Ptr, off);
817 return Ptr;
818}
819
820
821/**
822 * Emits a register (@a uReg) save location:
823 * CFA + @a off * data_alignment_factor
824 */
825DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
826{
827 if (uReg < 0x40)
828 *Ptr.pb++ = DW_CFA_offset | uReg;
829 else
830 {
831 *Ptr.pb++ = DW_CFA_offset_extended;
832 Ptr = iemDwarfPutUleb128(Ptr, uReg);
833 }
834 Ptr = iemDwarfPutUleb128(Ptr, off);
835 return Ptr;
836}
837
838
839# if 0 /* unused */
840/**
841 * Emits a register (@a uReg) save location, using signed offset:
842 * CFA + @a offSigned * data_alignment_factor
843 */
844DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
845{
846 *Ptr.pb++ = DW_CFA_offset_extended_sf;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
849 return Ptr;
850}
851# endif
852
853
854/**
855 * Initializes the unwind info section for non-windows hosts.
856 */
857static int
858iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
859 void *pvChunk, uint32_t idxChunk)
860{
861 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
862 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
863
864 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
865
866 /*
867 * Generate the CIE first.
868 */
869# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
870 uint8_t const iDwarfVer = 3;
871# else
872 uint8_t const iDwarfVer = 4;
873# endif
874 RTPTRUNION const PtrCie = Ptr;
875 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
876 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
877 *Ptr.pb++ = iDwarfVer; /* DwARF version */
878 *Ptr.pb++ = 0; /* Augmentation. */
879 if (iDwarfVer >= 4)
880 {
881 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
882 *Ptr.pb++ = 0; /* Segment selector size. */
883 }
884# ifdef RT_ARCH_AMD64
885 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
886# else
887 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
888# endif
889 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
890# ifdef RT_ARCH_AMD64
891 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
892# elif defined(RT_ARCH_ARM64)
893 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
894# else
895# error "port me"
896# endif
897 /* Initial instructions: */
898# ifdef RT_ARCH_AMD64
899 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
907# elif defined(RT_ARCH_ARM64)
908# if 1
909 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
910# else
911 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
912# endif
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
925 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
926 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
927# else
928# error "port me"
929# endif
930 while ((Ptr.u - PtrCie.u) & 3)
931 *Ptr.pb++ = DW_CFA_nop;
932 /* Finalize the CIE size. */
933 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
934
935 /*
936 * Generate an FDE for the whole chunk area.
937 */
938# ifdef IEMNATIVE_USE_LIBUNWIND
939 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
940# endif
941 RTPTRUNION const PtrFde = Ptr;
942 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
943 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
944 Ptr.pu32++;
945 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
946 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
947# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
948 *Ptr.pb++ = DW_CFA_nop;
949# endif
950 while ((Ptr.u - PtrFde.u) & 3)
951 *Ptr.pb++ = DW_CFA_nop;
952 /* Finalize the FDE size. */
953 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
954
955 /* Terminator entry. */
956 *Ptr.pu32++ = 0;
957 *Ptr.pu32++ = 0; /* just to be sure... */
958 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
959
960 /*
961 * Register it.
962 */
963# ifdef IEMNATIVE_USE_LIBUNWIND
964 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
965# else
966 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
967 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
968# endif
969
970# ifdef IEMNATIVE_USE_GDB_JIT
971 /*
972 * Now for telling GDB about this (experimental).
973 *
974 * This seems to work best with ET_DYN.
975 */
976 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
977# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
978 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
979 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
980# else
981 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
982 - pExecMemAllocator->cbHeapBlockHdr;
983 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
984# endif
985 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
986 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
987
988 RT_ZERO(*pSymFile);
989
990 /*
991 * The ELF header:
992 */
993 pSymFile->EHdr.e_ident[0] = ELFMAG0;
994 pSymFile->EHdr.e_ident[1] = ELFMAG1;
995 pSymFile->EHdr.e_ident[2] = ELFMAG2;
996 pSymFile->EHdr.e_ident[3] = ELFMAG3;
997 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
998 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
999 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1000 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1001# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1002 pSymFile->EHdr.e_type = ET_DYN;
1003# else
1004 pSymFile->EHdr.e_type = ET_REL;
1005# endif
1006# ifdef RT_ARCH_AMD64
1007 pSymFile->EHdr.e_machine = EM_AMD64;
1008# elif defined(RT_ARCH_ARM64)
1009 pSymFile->EHdr.e_machine = EM_AARCH64;
1010# else
1011# error "port me"
1012# endif
1013 pSymFile->EHdr.e_version = 1; /*?*/
1014 pSymFile->EHdr.e_entry = 0;
1015# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1016 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1017# else
1018 pSymFile->EHdr.e_phoff = 0;
1019# endif
1020 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1021 pSymFile->EHdr.e_flags = 0;
1022 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1023# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1024 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1025 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1026# else
1027 pSymFile->EHdr.e_phentsize = 0;
1028 pSymFile->EHdr.e_phnum = 0;
1029# endif
1030 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1031 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1032 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1033
1034 uint32_t offStrTab = 0;
1035#define APPEND_STR(a_szStr) do { \
1036 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1037 offStrTab += sizeof(a_szStr); \
1038 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1039 } while (0)
1040#define APPEND_STR_FMT(a_szStr, ...) do { \
1041 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1042 offStrTab++; \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045
1046 /*
1047 * Section headers.
1048 */
1049 /* Section header #0: NULL */
1050 unsigned i = 0;
1051 APPEND_STR("");
1052 RT_ZERO(pSymFile->aShdrs[i]);
1053 i++;
1054
1055 /* Section header: .eh_frame */
1056 pSymFile->aShdrs[i].sh_name = offStrTab;
1057 APPEND_STR(".eh_frame");
1058 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1059 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1060# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1061 pSymFile->aShdrs[i].sh_offset
1062 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1063# else
1064 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1065 pSymFile->aShdrs[i].sh_offset = 0;
1066# endif
1067
1068 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1069 pSymFile->aShdrs[i].sh_link = 0;
1070 pSymFile->aShdrs[i].sh_info = 0;
1071 pSymFile->aShdrs[i].sh_addralign = 1;
1072 pSymFile->aShdrs[i].sh_entsize = 0;
1073 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1074 i++;
1075
1076 /* Section header: .shstrtab */
1077 unsigned const iShStrTab = i;
1078 pSymFile->EHdr.e_shstrndx = iShStrTab;
1079 pSymFile->aShdrs[i].sh_name = offStrTab;
1080 APPEND_STR(".shstrtab");
1081 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1082 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1083# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1084 pSymFile->aShdrs[i].sh_offset
1085 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1086# else
1087 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1088 pSymFile->aShdrs[i].sh_offset = 0;
1089# endif
1090 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1091 pSymFile->aShdrs[i].sh_link = 0;
1092 pSymFile->aShdrs[i].sh_info = 0;
1093 pSymFile->aShdrs[i].sh_addralign = 1;
1094 pSymFile->aShdrs[i].sh_entsize = 0;
1095 i++;
1096
1097 /* Section header: .symbols */
1098 pSymFile->aShdrs[i].sh_name = offStrTab;
1099 APPEND_STR(".symtab");
1100 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1101 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1102 pSymFile->aShdrs[i].sh_offset
1103 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1104 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1105 pSymFile->aShdrs[i].sh_link = iShStrTab;
1106 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1107 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1108 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1109 i++;
1110
1111# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".dynsym");
1115 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1124 i++;
1125# endif
1126
1127# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1128 /* Section header: .dynamic */
1129 pSymFile->aShdrs[i].sh_name = offStrTab;
1130 APPEND_STR(".dynamic");
1131 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1132 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1133 pSymFile->aShdrs[i].sh_offset
1134 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1135 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1136 pSymFile->aShdrs[i].sh_link = iShStrTab;
1137 pSymFile->aShdrs[i].sh_info = 0;
1138 pSymFile->aShdrs[i].sh_addralign = 1;
1139 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1140 i++;
1141# endif
1142
1143 /* Section header: .text */
1144 unsigned const iShText = i;
1145 pSymFile->aShdrs[i].sh_name = offStrTab;
1146 APPEND_STR(".text");
1147 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1148 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1149# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1150 pSymFile->aShdrs[i].sh_offset
1151 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1152# else
1153 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1154 pSymFile->aShdrs[i].sh_offset = 0;
1155# endif
1156 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1157 pSymFile->aShdrs[i].sh_link = 0;
1158 pSymFile->aShdrs[i].sh_info = 0;
1159 pSymFile->aShdrs[i].sh_addralign = 1;
1160 pSymFile->aShdrs[i].sh_entsize = 0;
1161 i++;
1162
1163 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1164
1165# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1166 /*
1167 * The program headers:
1168 */
1169 /* Everything in a single LOAD segment: */
1170 i = 0;
1171 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1172 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1173 pSymFile->aPhdrs[i].p_offset
1174 = pSymFile->aPhdrs[i].p_vaddr
1175 = pSymFile->aPhdrs[i].p_paddr = 0;
1176 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1177 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1178 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1179 i++;
1180 /* The .dynamic segment. */
1181 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1182 pSymFile->aPhdrs[i].p_flags = PF_R;
1183 pSymFile->aPhdrs[i].p_offset
1184 = pSymFile->aPhdrs[i].p_vaddr
1185 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1186 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1187 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1188 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1189 i++;
1190
1191 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1192
1193 /*
1194 * The dynamic section:
1195 */
1196 i = 0;
1197 pSymFile->aDyn[i].d_tag = DT_SONAME;
1198 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1199 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1202 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1205 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1206 i++;
1207 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1208 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1211 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_NULL;
1214 i++;
1215 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1216# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1217
1218 /*
1219 * Symbol tables:
1220 */
1221 /** @todo gdb doesn't seem to really like this ... */
1222 i = 0;
1223 pSymFile->aSymbols[i].st_name = 0;
1224 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1225 pSymFile->aSymbols[i].st_value = 0;
1226 pSymFile->aSymbols[i].st_size = 0;
1227 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1228 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1229# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1230 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1231# endif
1232 i++;
1233
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240 i++;
1241
1242 pSymFile->aSymbols[i].st_name = offStrTab;
1243 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1244# if 0
1245 pSymFile->aSymbols[i].st_shndx = iShText;
1246 pSymFile->aSymbols[i].st_value = 0;
1247# else
1248 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1249 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1250# endif
1251 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1252 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1253 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1254# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1255 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1256 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1257# endif
1258 i++;
1259
1260 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1261 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1262
1263 /*
1264 * The GDB JIT entry and informing GDB.
1265 */
1266 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1267# if 1
1268 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1269# else
1270 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1271# endif
1272
1273 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1274 RTCritSectEnter(&g_IemNativeGdbJitLock);
1275 pEhFrame->GdbJitEntry.pNext = NULL;
1276 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1277 if (__jit_debug_descriptor.pTail)
1278 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1279 else
1280 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1281 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1282 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1283
1284 /* Notify GDB: */
1285 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1286 __jit_debug_register_code();
1287 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1288 RTCritSectLeave(&g_IemNativeGdbJitLock);
1289
1290# else /* !IEMNATIVE_USE_GDB_JIT */
1291 RT_NOREF(pVCpu);
1292# endif /* !IEMNATIVE_USE_GDB_JIT */
1293
1294 return VINF_SUCCESS;
1295}
1296
1297# endif /* !RT_OS_WINDOWS */
1298#endif /* IN_RING3 */
1299
1300
1301/**
1302 * Adds another chunk to the executable memory allocator.
1303 *
1304 * This is used by the init code for the initial allocation and later by the
1305 * regular allocator function when it's out of memory.
1306 */
1307static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1308{
1309 /* Check that we've room for growth. */
1310 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1311 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1312
1313 /* Allocate a chunk. */
1314#ifdef RT_OS_DARWIN
1315 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1316#else
1317 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1318#endif
1319 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1320
1321#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1322 int rc = VINF_SUCCESS;
1323#else
1324 /* Initialize the heap for the chunk. */
1325 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1326 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1327 AssertRC(rc);
1328 if (RT_SUCCESS(rc))
1329 {
1330 /*
1331 * We want the memory to be aligned on 64 byte, so the first time thru
1332 * here we do some exploratory allocations to see how we can achieve this.
1333 * On subsequent runs we only make an initial adjustment allocation, if
1334 * necessary.
1335 *
1336 * Since we own the heap implementation, we know that the internal block
1337 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1338 * so all we need to wrt allocation size adjustments is to add 32 bytes
1339 * to the size, align up by 64 bytes, and subtract 32 bytes.
1340 *
1341 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1342 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1343 * allocation to force subsequent allocations to return 64 byte aligned
1344 * user areas.
1345 */
1346 if (!pExecMemAllocator->cbHeapBlockHdr)
1347 {
1348 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1349 pExecMemAllocator->cbHeapAlignTweak = 64;
1350 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1351 32 /*cbAlignment*/);
1352 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1353
1354 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1355 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1356 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1357 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1358 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1359
1360 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1361 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1362 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1363 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1364 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1365
1366 RTHeapSimpleFree(hHeap, pvTest2);
1367 RTHeapSimpleFree(hHeap, pvTest1);
1368 }
1369 else
1370 {
1371 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1372 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1373 }
1374 if (RT_SUCCESS(rc))
1375#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1376 {
1377 /*
1378 * Add the chunk.
1379 *
1380 * This must be done before the unwind init so windows can allocate
1381 * memory from the chunk when using the alternative sub-allocator.
1382 */
1383 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1384#ifdef IN_RING3
1385 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1386#endif
1387#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1388 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1389#else
1390 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1391 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1392 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1393 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1394#endif
1395
1396 pExecMemAllocator->cChunks = idxChunk + 1;
1397 pExecMemAllocator->idxChunkHint = idxChunk;
1398
1399#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1400 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1401 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1402#else
1403 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1404 pExecMemAllocator->cbTotal += cbFree;
1405 pExecMemAllocator->cbFree += cbFree;
1406#endif
1407
1408#ifdef IN_RING3
1409 /*
1410 * Initialize the unwind information (this cannot really fail atm).
1411 * (This sets pvUnwindInfo.)
1412 */
1413 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1414 if (RT_SUCCESS(rc))
1415#endif
1416 {
1417 return VINF_SUCCESS;
1418 }
1419
1420#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1421 /* Just in case the impossible happens, undo the above up: */
1422 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1423 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1424 pExecMemAllocator->cChunks = idxChunk;
1425 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1426 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1427 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1428 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1429#endif
1430 }
1431#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 }
1433#endif
1434 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1435 RT_NOREF(pVCpu);
1436 return rc;
1437}
1438
1439
1440/**
1441 * Initializes the executable memory allocator for native recompilation on the
1442 * calling EMT.
1443 *
1444 * @returns VBox status code.
1445 * @param pVCpu The cross context virtual CPU structure of the calling
1446 * thread.
1447 * @param cbMax The max size of the allocator.
1448 * @param cbInitial The initial allocator size.
1449 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1450 * dependent).
1451 */
1452int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1453{
1454 /*
1455 * Validate input.
1456 */
1457 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1458 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1459 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1460 || cbChunk == 0
1461 || ( RT_IS_POWER_OF_TWO(cbChunk)
1462 && cbChunk >= _1M
1463 && cbChunk <= _256M
1464 && cbChunk <= cbMax),
1465 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1466 VERR_OUT_OF_RANGE);
1467
1468 /*
1469 * Adjust/figure out the chunk size.
1470 */
1471 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1472 {
1473 if (cbMax >= _256M)
1474 cbChunk = _64M;
1475 else
1476 {
1477 if (cbMax < _16M)
1478 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1479 else
1480 cbChunk = (uint32_t)cbMax / 4;
1481 if (!RT_IS_POWER_OF_TWO(cbChunk))
1482 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1483 }
1484 }
1485
1486 if (cbChunk > cbMax)
1487 cbMax = cbChunk;
1488 else
1489 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1490 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1491 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1492
1493 /*
1494 * Allocate and initialize the allocatore instance.
1495 */
1496 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1497#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1498 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1499 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1500 cbNeeded += cbBitmap * cMaxChunks;
1501 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1502 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1503#endif
1504#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1505 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1506 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1507#endif
1508 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1509 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1510 VERR_NO_MEMORY);
1511 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1512 pExecMemAllocator->cbChunk = cbChunk;
1513 pExecMemAllocator->cMaxChunks = cMaxChunks;
1514 pExecMemAllocator->cChunks = 0;
1515 pExecMemAllocator->idxChunkHint = 0;
1516 pExecMemAllocator->cAllocations = 0;
1517 pExecMemAllocator->cbTotal = 0;
1518 pExecMemAllocator->cbFree = 0;
1519 pExecMemAllocator->cbAllocated = 0;
1520#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1521 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1522 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1523 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1524 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1525#endif
1526#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1527 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1528#endif
1529 for (uint32_t i = 0; i < cMaxChunks; i++)
1530 {
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1533 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1534#else
1535 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1536#endif
1537 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1538#ifdef IN_RING0
1539 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1540#else
1541 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1542#endif
1543 }
1544 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1545
1546 /*
1547 * Do the initial allocations.
1548 */
1549 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1550 {
1551 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1552 AssertLogRelRCReturn(rc, rc);
1553 }
1554
1555 pExecMemAllocator->idxChunkHint = 0;
1556
1557 return VINF_SUCCESS;
1558}
1559
1560
1561/*********************************************************************************************************************************
1562* Native Recompilation *
1563*********************************************************************************************************************************/
1564
1565
1566/**
1567 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1568 */
1569IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1570{
1571 pVCpu->iem.s.cInstructions += idxInstr;
1572 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1573}
1574
1575
1576/**
1577 * Used by TB code when it wants to raise a \#GP(0).
1578 */
1579IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1580{
1581 pVCpu->iem.s.cInstructions += idxInstr;
1582 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1583#ifndef _MSC_VER
1584 return VINF_IEM_RAISED_XCPT; /* not reached */
1585#endif
1586}
1587
1588
1589/**
1590 * Reinitializes the native recompiler state.
1591 *
1592 * Called before starting a new recompile job.
1593 */
1594static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1595{
1596 pReNative->cLabels = 0;
1597 pReNative->bmLabelTypes = 0;
1598 pReNative->cFixups = 0;
1599#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1600 pReNative->pDbgInfo->cEntries = 0;
1601#endif
1602 pReNative->pTbOrg = pTb;
1603 pReNative->cCondDepth = 0;
1604 pReNative->uCondSeqNo = 0;
1605 pReNative->uCheckIrqSeqNo = 0;
1606
1607 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1608#if IEMNATIVE_HST_GREG_COUNT < 32
1609 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1610#endif
1611 ;
1612 pReNative->Core.bmHstRegsWithGstShadow = 0;
1613 pReNative->Core.bmGstRegShadows = 0;
1614 pReNative->Core.bmVars = 0;
1615 pReNative->Core.bmStack = 0;
1616 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1617 pReNative->Core.u64ArgVars = UINT64_MAX;
1618
1619 /* Full host register reinit: */
1620 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1621 {
1622 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1623 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1624 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1625 }
1626
1627 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1628 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1629#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1630 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1631#endif
1632#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1633 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1634#endif
1635 );
1636 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1637 {
1638 fRegs &= ~RT_BIT_32(idxReg);
1639 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1640 }
1641
1642 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1643#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1644 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1645#endif
1646#ifdef IEMNATIVE_REG_FIXED_TMP0
1647 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1648#endif
1649 return pReNative;
1650}
1651
1652
1653/**
1654 * Allocates and initializes the native recompiler state.
1655 *
1656 * This is called the first time an EMT wants to recompile something.
1657 *
1658 * @returns Pointer to the new recompiler state.
1659 * @param pVCpu The cross context virtual CPU structure of the calling
1660 * thread.
1661 * @param pTb The TB that's about to be recompiled.
1662 * @thread EMT(pVCpu)
1663 */
1664static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1665{
1666 VMCPU_ASSERT_EMT(pVCpu);
1667
1668 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1669 AssertReturn(pReNative, NULL);
1670
1671 /*
1672 * Try allocate all the buffers and stuff we need.
1673 */
1674 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1675 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1676 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1677#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1678 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1679#endif
1680 if (RT_LIKELY( pReNative->pInstrBuf
1681 && pReNative->paLabels
1682 && pReNative->paFixups)
1683#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1684 && pReNative->pDbgInfo
1685#endif
1686 )
1687 {
1688 /*
1689 * Set the buffer & array sizes on success.
1690 */
1691 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1692 pReNative->cLabelsAlloc = _8K;
1693 pReNative->cFixupsAlloc = _16K;
1694#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1695 pReNative->cDbgInfoAlloc = _16K;
1696#endif
1697
1698 /*
1699 * Done, just need to save it and reinit it.
1700 */
1701 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1702 return iemNativeReInit(pReNative, pTb);
1703 }
1704
1705 /*
1706 * Failed. Cleanup and return.
1707 */
1708 AssertFailed();
1709 RTMemFree(pReNative->pInstrBuf);
1710 RTMemFree(pReNative->paLabels);
1711 RTMemFree(pReNative->paFixups);
1712#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1713 RTMemFree(pReNative->pDbgInfo);
1714#endif
1715 RTMemFree(pReNative);
1716 return NULL;
1717}
1718
1719
1720/**
1721 * Creates a label
1722 *
1723 * If the label does not yet have a defined position,
1724 * call iemNativeLabelDefine() later to set it.
1725 *
1726 * @returns Label ID. Throws VBox status code on failure, so no need to check
1727 * the return value.
1728 * @param pReNative The native recompile state.
1729 * @param enmType The label type.
1730 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1731 * label is not yet defined (default).
1732 * @param uData Data associated with the lable. Only applicable to
1733 * certain type of labels. Default is zero.
1734 */
1735DECL_HIDDEN_THROW(uint32_t)
1736iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1737 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1738{
1739 /*
1740 * Locate existing label definition.
1741 *
1742 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1743 * and uData is zero.
1744 */
1745 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1746 uint32_t const cLabels = pReNative->cLabels;
1747 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1748#ifndef VBOX_STRICT
1749 && offWhere == UINT32_MAX
1750 && uData == 0
1751#endif
1752 )
1753 {
1754 /** @todo Since this is only used for labels with uData = 0, just use a
1755 * lookup array? */
1756 for (uint32_t i = 0; i < cLabels; i++)
1757 if ( paLabels[i].enmType == enmType
1758 && paLabels[i].uData == uData)
1759 {
1760#ifdef VBOX_STRICT
1761 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1762 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1763#endif
1764 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1765 return i;
1766 }
1767 }
1768
1769 /*
1770 * Make sure we've got room for another label.
1771 */
1772 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1773 { /* likely */ }
1774 else
1775 {
1776 uint32_t cNew = pReNative->cLabelsAlloc;
1777 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1778 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1779 cNew *= 2;
1780 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1781 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1782 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1783 pReNative->paLabels = paLabels;
1784 pReNative->cLabelsAlloc = cNew;
1785 }
1786
1787 /*
1788 * Define a new label.
1789 */
1790 paLabels[cLabels].off = offWhere;
1791 paLabels[cLabels].enmType = enmType;
1792 paLabels[cLabels].uData = uData;
1793 pReNative->cLabels = cLabels + 1;
1794
1795 Assert((unsigned)enmType < 64);
1796 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1797
1798 if (offWhere != UINT32_MAX)
1799 {
1800#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1801 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1802 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1803#endif
1804 }
1805 return cLabels;
1806}
1807
1808
1809/**
1810 * Defines the location of an existing label.
1811 *
1812 * @param pReNative The native recompile state.
1813 * @param idxLabel The label to define.
1814 * @param offWhere The position.
1815 */
1816DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1817{
1818 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1819 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1820 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1821 pLabel->off = offWhere;
1822#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1823 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1824 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1825#endif
1826}
1827
1828
1829/**
1830 * Looks up a lable.
1831 *
1832 * @returns Label ID if found, UINT32_MAX if not.
1833 */
1834static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1835 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1836{
1837 Assert((unsigned)enmType < 64);
1838 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1839 {
1840 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1841 uint32_t const cLabels = pReNative->cLabels;
1842 for (uint32_t i = 0; i < cLabels; i++)
1843 if ( paLabels[i].enmType == enmType
1844 && paLabels[i].uData == uData
1845 && ( paLabels[i].off == offWhere
1846 || offWhere == UINT32_MAX
1847 || paLabels[i].off == UINT32_MAX))
1848 return i;
1849 }
1850 return UINT32_MAX;
1851}
1852
1853
1854/**
1855 * Adds a fixup.
1856 *
1857 * @throws VBox status code (int) on failure.
1858 * @param pReNative The native recompile state.
1859 * @param offWhere The instruction offset of the fixup location.
1860 * @param idxLabel The target label ID for the fixup.
1861 * @param enmType The fixup type.
1862 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1863 */
1864DECL_HIDDEN_THROW(void)
1865iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1866 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1867{
1868 Assert(idxLabel <= UINT16_MAX);
1869 Assert((unsigned)enmType <= UINT8_MAX);
1870
1871 /*
1872 * Make sure we've room.
1873 */
1874 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1875 uint32_t const cFixups = pReNative->cFixups;
1876 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1877 { /* likely */ }
1878 else
1879 {
1880 uint32_t cNew = pReNative->cFixupsAlloc;
1881 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1882 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1883 cNew *= 2;
1884 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1885 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1886 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1887 pReNative->paFixups = paFixups;
1888 pReNative->cFixupsAlloc = cNew;
1889 }
1890
1891 /*
1892 * Add the fixup.
1893 */
1894 paFixups[cFixups].off = offWhere;
1895 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1896 paFixups[cFixups].enmType = enmType;
1897 paFixups[cFixups].offAddend = offAddend;
1898 pReNative->cFixups = cFixups + 1;
1899}
1900
1901
1902/**
1903 * Slow code path for iemNativeInstrBufEnsure.
1904 */
1905DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1906{
1907 /* Double the buffer size till we meet the request. */
1908 uint32_t cNew = pReNative->cInstrBufAlloc;
1909 AssertReturn(cNew > 0, NULL);
1910 do
1911 cNew *= 2;
1912 while (cNew < off + cInstrReq);
1913
1914 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1915#ifdef RT_ARCH_ARM64
1916 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1917#else
1918 uint32_t const cbMaxInstrBuf = _2M;
1919#endif
1920 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1921
1922 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1923 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1924
1925 pReNative->cInstrBufAlloc = cNew;
1926 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1927}
1928
1929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1930
1931/**
1932 * Grows the static debug info array used during recompilation.
1933 *
1934 * @returns Pointer to the new debug info block; throws VBox status code on
1935 * failure, so no need to check the return value.
1936 */
1937DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1938{
1939 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1940 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1941 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1942 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1943 pReNative->pDbgInfo = pDbgInfo;
1944 pReNative->cDbgInfoAlloc = cNew;
1945 return pDbgInfo;
1946}
1947
1948
1949/**
1950 * Adds a new debug info uninitialized entry, returning the pointer to it.
1951 */
1952DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1953{
1954 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1955 { /* likely */ }
1956 else
1957 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1958 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1959}
1960
1961
1962/**
1963 * Debug Info: Adds a native offset record, if necessary.
1964 */
1965static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1966{
1967 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1968
1969 /*
1970 * Search backwards to see if we've got a similar record already.
1971 */
1972 uint32_t idx = pDbgInfo->cEntries;
1973 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1974 while (idx-- > idxStop)
1975 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1976 {
1977 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1978 return;
1979 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1980 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1981 break;
1982 }
1983
1984 /*
1985 * Add it.
1986 */
1987 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1988 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1989 pEntry->NativeOffset.offNative = off;
1990}
1991
1992
1993/**
1994 * Debug Info: Record info about a label.
1995 */
1996static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
1997{
1998 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1999 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2000 pEntry->Label.uUnused = 0;
2001 pEntry->Label.enmLabel = (uint8_t)enmType;
2002 pEntry->Label.uData = uData;
2003}
2004
2005
2006/**
2007 * Debug Info: Record info about a threaded call.
2008 */
2009static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2010{
2011 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2012 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2013 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2014 pEntry->ThreadedCall.uUnused = 0;
2015 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2016}
2017
2018
2019/**
2020 * Debug Info: Record info about a new guest instruction.
2021 */
2022static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2023{
2024 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2025 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2026 pEntry->GuestInstruction.uUnused = 0;
2027 pEntry->GuestInstruction.fExec = fExec;
2028}
2029
2030
2031/**
2032 * Debug Info: Record info about guest register shadowing.
2033 */
2034static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2035 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2036{
2037 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2038 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2039 pEntry->GuestRegShadowing.uUnused = 0;
2040 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2041 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2042 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2043}
2044
2045#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2046
2047
2048/*********************************************************************************************************************************
2049* Register Allocator *
2050*********************************************************************************************************************************/
2051
2052/**
2053 * Register parameter indexes (indexed by argument number).
2054 */
2055DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2056{
2057 IEMNATIVE_CALL_ARG0_GREG,
2058 IEMNATIVE_CALL_ARG1_GREG,
2059 IEMNATIVE_CALL_ARG2_GREG,
2060 IEMNATIVE_CALL_ARG3_GREG,
2061#if defined(IEMNATIVE_CALL_ARG4_GREG)
2062 IEMNATIVE_CALL_ARG4_GREG,
2063# if defined(IEMNATIVE_CALL_ARG5_GREG)
2064 IEMNATIVE_CALL_ARG5_GREG,
2065# if defined(IEMNATIVE_CALL_ARG6_GREG)
2066 IEMNATIVE_CALL_ARG6_GREG,
2067# if defined(IEMNATIVE_CALL_ARG7_GREG)
2068 IEMNATIVE_CALL_ARG7_GREG,
2069# endif
2070# endif
2071# endif
2072#endif
2073};
2074
2075/**
2076 * Call register masks indexed by argument count.
2077 */
2078DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2079{
2080 0,
2081 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2083 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2084 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2085 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2086#if defined(IEMNATIVE_CALL_ARG4_GREG)
2087 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2088 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2089# if defined(IEMNATIVE_CALL_ARG5_GREG)
2090 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2091 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2092# if defined(IEMNATIVE_CALL_ARG6_GREG)
2093 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2094 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2095 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2096# if defined(IEMNATIVE_CALL_ARG7_GREG)
2097 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2098 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2099 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2100# endif
2101# endif
2102# endif
2103#endif
2104};
2105
2106#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2107/**
2108 * BP offset of the stack argument slots.
2109 *
2110 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2111 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2112 */
2113DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2114{
2115 IEMNATIVE_FP_OFF_STACK_ARG0,
2116# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2117 IEMNATIVE_FP_OFF_STACK_ARG1,
2118# endif
2119# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2120 IEMNATIVE_FP_OFF_STACK_ARG2,
2121# endif
2122# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2123 IEMNATIVE_FP_OFF_STACK_ARG3,
2124# endif
2125};
2126AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2127#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2128
2129/**
2130 * Info about shadowed guest register values.
2131 * @see IEMNATIVEGSTREG
2132 */
2133static struct
2134{
2135 /** Offset in VMCPU. */
2136 uint32_t off;
2137 /** The field size. */
2138 uint8_t cb;
2139 /** Name (for logging). */
2140 const char *pszName;
2141} const g_aGstShadowInfo[] =
2142{
2143#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2144 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2145 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2146 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2147 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2148 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2160 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2161 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2162 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2163 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2164 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2165 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2166 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2167 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2168 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2169 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2170 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2171 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2172 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2173 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2174 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2175 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2176 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2177 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2178 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2179 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2180 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2181 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2182 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2183 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2184 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2185 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2186#undef CPUMCTX_OFF_AND_SIZE
2187};
2188AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2189
2190
2191/** Host CPU general purpose register names. */
2192const char * const g_apszIemNativeHstRegNames[] =
2193{
2194#ifdef RT_ARCH_AMD64
2195 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2196#elif RT_ARCH_ARM64
2197 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2198 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2199#else
2200# error "port me"
2201#endif
2202};
2203
2204
2205DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2206 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2207{
2208 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2209
2210 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2211 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2212 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2213 return (uint8_t)idxReg;
2214}
2215
2216
2217/**
2218 * Tries to locate a suitable register in the given register mask.
2219 *
2220 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2221 * failed.
2222 *
2223 * @returns Host register number on success, returns UINT8_MAX on failure.
2224 */
2225static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2226{
2227 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2228 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2229 if (fRegs)
2230 {
2231 /** @todo pick better here: */
2232 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2233
2234 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2235 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2236 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2237 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2238
2239 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2240 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2241 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2242 return idxReg;
2243 }
2244 return UINT8_MAX;
2245}
2246
2247
2248/**
2249 * Locate a register, possibly freeing one up.
2250 *
2251 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2252 * failed.
2253 *
2254 * @returns Host register number on success. Returns UINT8_MAX if no registers
2255 * found, the caller is supposed to deal with this and raise a
2256 * allocation type specific status code (if desired).
2257 *
2258 * @throws VBox status code if we're run into trouble spilling a variable of
2259 * recording debug info. Does NOT throw anything if we're out of
2260 * registers, though.
2261 */
2262static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2263 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2264{
2265 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2266 Assert(!(fRegMask & ~IEMNATIVE_REG_FIXED_MASK));
2267
2268 /*
2269 * Try a freed register that's shadowing a guest register
2270 */
2271 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2272 if (fRegs)
2273 {
2274 unsigned const idxReg = (fPreferVolatile
2275 ? ASMBitFirstSetU32(fRegs)
2276 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2277 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2278 - 1;
2279
2280 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2281 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2282 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2283 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2284
2285 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2286 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2287 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2288 return idxReg;
2289 }
2290
2291 /*
2292 * Try free up a variable that's in a register.
2293 *
2294 * We do two rounds here, first evacuating variables we don't need to be
2295 * saved on the stack, then in the second round move things to the stack.
2296 */
2297 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2298 {
2299 uint32_t fVars = pReNative->Core.bmVars;
2300 while (fVars)
2301 {
2302 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2303 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2304 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2305 && (RT_BIT_32(idxReg) & fRegMask)
2306 && ( iLoop == 0
2307 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2308 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2309 {
2310 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2311 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2312 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2313 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2314 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2315
2316 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2317 {
2318 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
2319 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2320 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff,
2321 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2322 - IEMNATIVE_FP_OFF_STACK_VARS,
2323 idxReg);
2324 }
2325
2326 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2327 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2328 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2329 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2330 return idxReg;
2331 }
2332 fVars &= ~RT_BIT_32(idxVar);
2333 }
2334 }
2335
2336 return UINT8_MAX;
2337}
2338
2339
2340/**
2341 * Moves a variable to a different register or spills it onto the stack.
2342 *
2343 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2344 * kinds can easily be recreated if needed later.
2345 *
2346 * @returns The new code buffer position, UINT32_MAX on failure.
2347 * @param pReNative The native recompile state.
2348 * @param off The current code buffer position.
2349 * @param idxVar The variable index.
2350 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2351 * call-volatile registers.
2352 */
2353static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2354 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2355{
2356 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2357 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2358
2359 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2360 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2361 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2362 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2363 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2364 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2365 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2366 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2367
2368
2369 /** @todo Add statistics on this.*/
2370 /** @todo Implement basic variable liveness analysis (python) so variables
2371 * can be freed immediately once no longer used. This has the potential to
2372 * be trashing registers and stack for dead variables. */
2373
2374 /*
2375 * First try move it to a different register, as that's cheaper.
2376 */
2377 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2378 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2379 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2380 if (fRegs)
2381 {
2382 /* Avoid using shadow registers, if possible. */
2383 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2384 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2385 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2386
2387 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2388 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2389 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2390 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2391 if (fGstRegShadows)
2392 {
2393 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2394 while (fGstRegShadows)
2395 {
2396 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2397 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2398
2399 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2400 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2401 }
2402 }
2403
2404 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2405 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2406 }
2407 /*
2408 * Otherwise we must spill the register onto the stack.
2409 */
2410 else
2411 {
2412 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2413 off = iemNativeEmitStoreGprByBp(pReNative, off,
2414 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2415 - IEMNATIVE_FP_OFF_STACK_VARS,
2416 idxRegOld);
2417
2418 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2419 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2420 }
2421
2422 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2423 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2424 return off;
2425}
2426
2427
2428/**
2429 * Allocates a temporary host general purpose register.
2430 *
2431 * This may emit code to save register content onto the stack in order to free
2432 * up a register.
2433 *
2434 * @returns The host register number; throws VBox status code on failure,
2435 * so no need to check the return value.
2436 * @param pReNative The native recompile state.
2437 * @param poff Pointer to the variable with the code buffer position.
2438 * This will be update if we need to move a variable from
2439 * register to stack in order to satisfy the request.
2440 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2441 * registers (@c true, default) or the other way around
2442 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2443 */
2444DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2445{
2446 /*
2447 * Try find a completely unused register, preferably a call-volatile one.
2448 */
2449 uint8_t idxReg;
2450 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2451 & ~pReNative->Core.bmHstRegsWithGstShadow
2452 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2453 if (fRegs)
2454 {
2455 if (fPreferVolatile)
2456 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2457 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2458 else
2459 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2460 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2461 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2462 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2463 }
2464 else
2465 {
2466 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2467 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2468 }
2469 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2470}
2471
2472
2473/**
2474 * Allocates a temporary register for loading an immediate value into.
2475 *
2476 * This will emit code to load the immediate, unless there happens to be an
2477 * unused register with the value already loaded.
2478 *
2479 * The caller will not modify the returned register, it must be considered
2480 * read-only. Free using iemNativeRegFreeTmpImm.
2481 *
2482 * @returns The host register number; throws VBox status code on failure, so no
2483 * need to check the return value.
2484 * @param pReNative The native recompile state.
2485 * @param poff Pointer to the variable with the code buffer position.
2486 * @param uImm The immediate value that the register must hold upon
2487 * return.
2488 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2489 * registers (@c true, default) or the other way around
2490 * (@c false).
2491 *
2492 * @note Reusing immediate values has not been implemented yet.
2493 */
2494DECL_HIDDEN_THROW(uint8_t)
2495iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2496{
2497 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2498 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2499 return idxReg;
2500}
2501
2502
2503/**
2504 * Marks host register @a idxHstReg as containing a shadow copy of guest
2505 * register @a enmGstReg.
2506 *
2507 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2508 * host register before calling.
2509 */
2510DECL_FORCE_INLINE(void)
2511iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2512{
2513 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2514
2515 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2516 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2517 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2518 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2519#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2520 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2521 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2522#else
2523 RT_NOREF(off);
2524#endif
2525}
2526
2527
2528/**
2529 * Clear any guest register shadow claims from @a idxHstReg.
2530 *
2531 * The register does not need to be shadowing any guest registers.
2532 */
2533DECL_FORCE_INLINE(void)
2534iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2535{
2536 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2537 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2538 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2539 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2540
2541#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2542 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2543 if (fGstRegs)
2544 {
2545 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2546 while (fGstRegs)
2547 {
2548 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2549 fGstRegs &= ~RT_BIT_64(iGstReg);
2550 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2551 }
2552 }
2553#else
2554 RT_NOREF(off);
2555#endif
2556
2557 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2558 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2559 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2560}
2561
2562
2563/**
2564 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2565 * to @a idxRegTo.
2566 */
2567DECL_FORCE_INLINE(void)
2568iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2569 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2570{
2571 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2572 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2573 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2574 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2575 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2576
2577 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2578 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2579 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2580#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2581 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2582 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2583#else
2584 RT_NOREF(off);
2585#endif
2586}
2587
2588
2589/**
2590 * Allocates a temporary host general purpose register for keeping a guest
2591 * register value.
2592 *
2593 * Since we may already have a register holding the guest register value,
2594 * code will be emitted to do the loading if that's not the case. Code may also
2595 * be emitted if we have to free up a register to satify the request.
2596 *
2597 * @returns The host register number; throws VBox status code on failure, so no
2598 * need to check the return value.
2599 * @param pReNative The native recompile state.
2600 * @param poff Pointer to the variable with the code buffer
2601 * position. This will be update if we need to move a
2602 * variable from register to stack in order to satisfy
2603 * the request.
2604 * @param enmGstReg The guest register that will is to be updated.
2605 * @param enmIntendedUse How the caller will be using the host register.
2606 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2607 */
2608DECL_HIDDEN_THROW(uint8_t)
2609iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2610 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2611{
2612 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2613#ifdef LOG_ENABLED
2614 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2615#endif
2616
2617 /*
2618 * First check if the guest register value is already in a host register.
2619 */
2620 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2621 {
2622 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2623 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2624 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2625 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2626
2627 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2628 {
2629 /*
2630 * If the register will trash the guest shadow copy, try find a
2631 * completely unused register we can use instead. If that fails,
2632 * we need to disassociate the host reg from the guest reg.
2633 */
2634 /** @todo would be nice to know if preserving the register is in any way helpful. */
2635 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2636 && ( ~pReNative->Core.bmHstRegs
2637 & ~pReNative->Core.bmHstRegsWithGstShadow
2638 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2639 {
2640 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2641
2642 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2643
2644 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2645 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2646 g_apszIemNativeHstRegNames[idxRegNew]));
2647 idxReg = idxRegNew;
2648 }
2649 else
2650 {
2651 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2652 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2653 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2654 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2655 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2656 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2657 else
2658 {
2659 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2660 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2661 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2662 }
2663 }
2664 }
2665 else
2666 {
2667 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2668 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2669
2670 /*
2671 * Allocate a new register, copy the value and, if updating, the
2672 * guest shadow copy assignment to the new register.
2673 */
2674 /** @todo share register for readonly access. */
2675 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2676
2677 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2678
2679 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2680 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2681 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2682 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2683 else
2684 {
2685 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2686 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2687 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2688 g_apszIemNativeHstRegNames[idxRegNew]));
2689 }
2690 idxReg = idxRegNew;
2691 }
2692
2693#ifdef VBOX_STRICT
2694 /* Strict builds: Check that the value is correct. */
2695 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2696#endif
2697
2698 return idxReg;
2699 }
2700
2701 /*
2702 * Allocate a new register, load it with the guest value and designate it as a copy of the
2703 */
2704 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2705
2706 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2707
2708 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2709 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2710 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2711 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2712
2713 return idxRegNew;
2714}
2715
2716
2717/**
2718 * Allocates a temporary host general purpose register that already holds the
2719 * given guest register value.
2720 *
2721 * The use case for this function is places where the shadowing state cannot be
2722 * modified due to branching and such. This will fail if the we don't have a
2723 * current shadow copy handy or if it's incompatible. The only code that will
2724 * be emitted here is value checking code in strict builds.
2725 *
2726 * The intended use can only be readonly!
2727 *
2728 * @returns The host register number, UINT8_MAX if not present.
2729 * @param pReNative The native recompile state.
2730 * @param poff Pointer to the instruction buffer offset.
2731 * Will be updated in strict builds if a register is
2732 * found.
2733 * @param enmGstReg The guest register that will is to be updated.
2734 * @note In strict builds, this may throw instruction buffer growth failures.
2735 * Non-strict builds will not throw anything.
2736 * @sa iemNativeRegAllocTmpForGuestReg
2737 */
2738DECL_HIDDEN_THROW(uint8_t)
2739iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2740{
2741 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2742
2743 /*
2744 * First check if the guest register value is already in a host register.
2745 */
2746 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2747 {
2748 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2749 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2750 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2751 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2752
2753 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2754 {
2755 /*
2756 * We only do readonly use here, so easy compared to the other
2757 * variant of this code.
2758 */
2759 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2760 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2761 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2762 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2763 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2764
2765#ifdef VBOX_STRICT
2766 /* Strict builds: Check that the value is correct. */
2767 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2768#else
2769 RT_NOREF(poff);
2770#endif
2771 return idxReg;
2772 }
2773 }
2774
2775 return UINT8_MAX;
2776}
2777
2778
2779DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2780
2781
2782/**
2783 * Allocates argument registers for a function call.
2784 *
2785 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2786 * need to check the return value.
2787 * @param pReNative The native recompile state.
2788 * @param off The current code buffer offset.
2789 * @param cArgs The number of arguments the function call takes.
2790 */
2791DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2792{
2793 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2794 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2795 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2796 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2797
2798 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2799 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2800 else if (cArgs == 0)
2801 return true;
2802
2803 /*
2804 * Do we get luck and all register are free and not shadowing anything?
2805 */
2806 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2807 for (uint32_t i = 0; i < cArgs; i++)
2808 {
2809 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2810 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2811 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2812 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2813 }
2814 /*
2815 * Okay, not lucky so we have to free up the registers.
2816 */
2817 else
2818 for (uint32_t i = 0; i < cArgs; i++)
2819 {
2820 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2821 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2822 {
2823 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2824 {
2825 case kIemNativeWhat_Var:
2826 {
2827 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2828 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2829 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2830 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2831 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2832
2833 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2834 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2835 else
2836 {
2837 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2838 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2839 }
2840 break;
2841 }
2842
2843 case kIemNativeWhat_Tmp:
2844 case kIemNativeWhat_Arg:
2845 case kIemNativeWhat_rc:
2846 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2847 default:
2848 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2849 }
2850
2851 }
2852 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2853 {
2854 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2855 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2856 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2857 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2858 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2859 }
2860 else
2861 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2862 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2863 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2864 }
2865 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2866 return true;
2867}
2868
2869
2870DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2871
2872
2873#if 0
2874/**
2875 * Frees a register assignment of any type.
2876 *
2877 * @param pReNative The native recompile state.
2878 * @param idxHstReg The register to free.
2879 *
2880 * @note Does not update variables.
2881 */
2882DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2883{
2884 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2885 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2886 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2887 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2888 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2889 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2890 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2891 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2892 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2893 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2894 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2895 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2896 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2897 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2898
2899 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2900 /* no flushing, right:
2901 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2902 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2903 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2904 */
2905}
2906#endif
2907
2908
2909/**
2910 * Frees a temporary register.
2911 *
2912 * Any shadow copies of guest registers assigned to the host register will not
2913 * be flushed by this operation.
2914 */
2915DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2916{
2917 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2918 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2919 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2920 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2921 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2922}
2923
2924
2925/**
2926 * Frees a temporary immediate register.
2927 *
2928 * It is assumed that the call has not modified the register, so it still hold
2929 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2930 */
2931DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2932{
2933 iemNativeRegFreeTmp(pReNative, idxHstReg);
2934}
2935
2936
2937/**
2938 * Called right before emitting a call instruction to move anything important
2939 * out of call-volatile registers, free and flush the call-volatile registers,
2940 * optionally freeing argument variables.
2941 *
2942 * @returns New code buffer offset, UINT32_MAX on failure.
2943 * @param pReNative The native recompile state.
2944 * @param off The code buffer offset.
2945 * @param cArgs The number of arguments the function call takes.
2946 * It is presumed that the host register part of these have
2947 * been allocated as such already and won't need moving,
2948 * just freeing.
2949 */
2950DECL_HIDDEN_THROW(uint32_t)
2951iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2952{
2953 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
2954
2955 /*
2956 * Move anything important out of volatile registers.
2957 */
2958 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2959 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2960 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2961#ifdef IEMNATIVE_REG_FIXED_TMP0
2962 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2963#endif
2964 & ~g_afIemNativeCallRegs[cArgs];
2965
2966 fRegsToMove &= pReNative->Core.bmHstRegs;
2967 if (!fRegsToMove)
2968 { /* likely */ }
2969 else
2970 while (fRegsToMove != 0)
2971 {
2972 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2973 fRegsToMove &= ~RT_BIT_32(idxReg);
2974
2975 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2976 {
2977 case kIemNativeWhat_Var:
2978 {
2979 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2980 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2981 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2982 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2983 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2984 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2985 else
2986 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2987 continue;
2988 }
2989
2990 case kIemNativeWhat_Arg:
2991 AssertMsgFailed(("What?!?: %u\n", idxReg));
2992 continue;
2993
2994 case kIemNativeWhat_rc:
2995 case kIemNativeWhat_Tmp:
2996 AssertMsgFailed(("Missing free: %u\n", idxReg));
2997 continue;
2998
2999 case kIemNativeWhat_FixedTmp:
3000 case kIemNativeWhat_pVCpuFixed:
3001 case kIemNativeWhat_pCtxFixed:
3002 case kIemNativeWhat_FixedReserved:
3003 case kIemNativeWhat_Invalid:
3004 case kIemNativeWhat_End:
3005 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3006 }
3007 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3008 }
3009
3010 /*
3011 * Do the actual freeing.
3012 */
3013 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3014
3015 /* If there are guest register shadows in any call-volatile register, we
3016 have to clear the corrsponding guest register masks for each register. */
3017 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3018 if (fHstRegsWithGstShadow)
3019 {
3020 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3021 do
3022 {
3023 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3024 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
3025
3026 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3027 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3028 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3029 } while (fHstRegsWithGstShadow != 0);
3030 }
3031
3032 return off;
3033}
3034
3035
3036/**
3037 * Flushes a set of guest register shadow copies.
3038 *
3039 * This is usually done after calling a threaded function or a C-implementation
3040 * of an instruction.
3041 *
3042 * @param pReNative The native recompile state.
3043 * @param fGstRegs Set of guest registers to flush.
3044 */
3045DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3046{
3047 /*
3048 * Reduce the mask by what's currently shadowed
3049 */
3050 fGstRegs &= pReNative->Core.bmGstRegShadows;
3051 if (fGstRegs)
3052 {
3053 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3054 if (pReNative->Core.bmGstRegShadows)
3055 {
3056 /*
3057 * Partial.
3058 */
3059 do
3060 {
3061 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3062 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3063 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3064 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3065 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3066
3067 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3068 fGstRegs &= ~fInThisHstReg;
3069 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
3070 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3071 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3072 } while (fGstRegs != 0);
3073 }
3074 else
3075 {
3076 /*
3077 * Clear all.
3078 */
3079 do
3080 {
3081 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3082 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3083 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3084 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3085 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3086
3087 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3088 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3089 } while (fGstRegs != 0);
3090 pReNative->Core.bmHstRegsWithGstShadow = 0;
3091 }
3092 }
3093}
3094
3095
3096/**
3097 * Flushes any delayed guest register writes.
3098 *
3099 * This must be called prior to calling CImpl functions and any helpers that use
3100 * the guest state (like raising exceptions) and such.
3101 *
3102 * This optimization has not yet been implemented. The first target would be
3103 * RIP updates, since these are the most common ones.
3104 */
3105DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3106{
3107 RT_NOREF(pReNative, off);
3108 return off;
3109}
3110
3111
3112/*********************************************************************************************************************************
3113* Code Emitters (larger snippets) *
3114*********************************************************************************************************************************/
3115
3116/**
3117 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3118 * extending to 64-bit width.
3119 *
3120 * @returns New code buffer offset on success, UINT32_MAX on failure.
3121 * @param pReNative .
3122 * @param off The current code buffer position.
3123 * @param idxHstReg The host register to load the guest register value into.
3124 * @param enmGstReg The guest register to load.
3125 *
3126 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3127 * that is something the caller needs to do if applicable.
3128 */
3129DECL_HIDDEN_THROW(uint32_t)
3130iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3131{
3132 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3133 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3134
3135 switch (g_aGstShadowInfo[enmGstReg].cb)
3136 {
3137 case sizeof(uint64_t):
3138 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3139 case sizeof(uint32_t):
3140 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3141 case sizeof(uint16_t):
3142 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3143#if 0 /* not present in the table. */
3144 case sizeof(uint8_t):
3145 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3146#endif
3147 default:
3148 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3149 }
3150}
3151
3152
3153#ifdef VBOX_STRICT
3154/**
3155 * Emitting code that checks that the content of register @a idxReg is the same
3156 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3157 * instruction if that's not the case.
3158 *
3159 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3160 * Trashes EFLAGS on AMD64.
3161 */
3162static uint32_t
3163iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3164{
3165# ifdef RT_ARCH_AMD64
3166 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3167
3168 /* cmp reg, [mem] */
3169 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3170 {
3171 if (idxReg >= 8)
3172 pbCodeBuf[off++] = X86_OP_REX_R;
3173 pbCodeBuf[off++] = 0x38;
3174 }
3175 else
3176 {
3177 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3178 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3179 else
3180 {
3181 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3182 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3183 else
3184 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3185 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3186 if (idxReg >= 8)
3187 pbCodeBuf[off++] = X86_OP_REX_R;
3188 }
3189 pbCodeBuf[off++] = 0x39;
3190 }
3191 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3192
3193 /* je/jz +1 */
3194 pbCodeBuf[off++] = 0x74;
3195 pbCodeBuf[off++] = 0x01;
3196
3197 /* int3 */
3198 pbCodeBuf[off++] = 0xcc;
3199
3200 /* For values smaller than the register size, we must check that the rest
3201 of the register is all zeros. */
3202 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3203 {
3204 /* test reg64, imm32 */
3205 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3206 pbCodeBuf[off++] = 0xf7;
3207 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3208 pbCodeBuf[off++] = 0;
3209 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3210 pbCodeBuf[off++] = 0xff;
3211 pbCodeBuf[off++] = 0xff;
3212
3213 /* je/jz +1 */
3214 pbCodeBuf[off++] = 0x74;
3215 pbCodeBuf[off++] = 0x01;
3216
3217 /* int3 */
3218 pbCodeBuf[off++] = 0xcc;
3219 }
3220 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3221 {
3222 /* rol reg64, 32 */
3223 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3224 pbCodeBuf[off++] = 0xc1;
3225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3226 pbCodeBuf[off++] = 32;
3227
3228 /* test reg32, ffffffffh */
3229 if (idxReg >= 8)
3230 pbCodeBuf[off++] = X86_OP_REX_B;
3231 pbCodeBuf[off++] = 0xf7;
3232 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3233 pbCodeBuf[off++] = 0xff;
3234 pbCodeBuf[off++] = 0xff;
3235 pbCodeBuf[off++] = 0xff;
3236 pbCodeBuf[off++] = 0xff;
3237
3238 /* je/jz +1 */
3239 pbCodeBuf[off++] = 0x74;
3240 pbCodeBuf[off++] = 0x01;
3241
3242 /* int3 */
3243 pbCodeBuf[off++] = 0xcc;
3244
3245 /* rol reg64, 32 */
3246 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3247 pbCodeBuf[off++] = 0xc1;
3248 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3249 pbCodeBuf[off++] = 32;
3250 }
3251
3252# elif defined(RT_ARCH_ARM64)
3253 /* mov TMP0, [gstreg] */
3254 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3255
3256 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3257 /* sub tmp0, tmp0, idxReg */
3258 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3259 /* cbz tmp0, +1 */
3260 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3261 /* brk #0x1000+enmGstReg */
3262 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3263
3264# else
3265# error "Port me!"
3266# endif
3267 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3268 return off;
3269}
3270#endif /* VBOX_STRICT */
3271
3272
3273
3274/**
3275 * Emits a code for checking the return code of a call and rcPassUp, returning
3276 * from the code if either are non-zero.
3277 */
3278DECL_HIDDEN_THROW(uint32_t)
3279iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3280{
3281#ifdef RT_ARCH_AMD64
3282 /*
3283 * AMD64: eax = call status code.
3284 */
3285
3286 /* edx = rcPassUp */
3287 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3288# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3289 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3290# endif
3291
3292 /* edx = eax | rcPassUp */
3293 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3294 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3295 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3297
3298 /* Jump to non-zero status return path. */
3299 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3300
3301 /* done. */
3302
3303#elif RT_ARCH_ARM64
3304 /*
3305 * ARM64: w0 = call status code.
3306 */
3307 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3308 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3309
3310 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3311
3312 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3313
3314 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3315 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3316 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3317
3318#else
3319# error "port me"
3320#endif
3321 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3322 return off;
3323}
3324
3325
3326/**
3327 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3328 * raising a \#GP(0) if it isn't.
3329 *
3330 * @returns New code buffer offset, UINT32_MAX on failure.
3331 * @param pReNative The native recompile state.
3332 * @param off The code buffer offset.
3333 * @param idxAddrReg The host register with the address to check.
3334 * @param idxInstr The current instruction.
3335 */
3336DECL_HIDDEN_THROW(uint32_t)
3337iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3338{
3339 RT_NOREF(idxInstr);
3340
3341 /*
3342 * Make sure we don't have any outstanding guest register writes as we may
3343 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3344 */
3345 off = iemNativeRegFlushPendingWrites(pReNative, off);
3346
3347#ifdef RT_ARCH_AMD64
3348 /*
3349 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3350 * return raisexcpt();
3351 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3352 */
3353 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3354
3355 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3356 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3357 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3358 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3359
3360# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3361 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3362# else
3363 uint32_t const offFixup = off;
3364 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3365 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3366 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3367 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3368# endif
3369
3370 iemNativeRegFreeTmp(pReNative, iTmpReg);
3371
3372#elif defined(RT_ARCH_ARM64)
3373 /*
3374 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3375 * return raisexcpt();
3376 * ----
3377 * mov x1, 0x800000000000
3378 * add x1, x0, x1
3379 * cmp xzr, x1, lsr 48
3380 * and either:
3381 * b.ne .Lraisexcpt
3382 * or:
3383 * b.eq .Lnoexcept
3384 * movz x1, #instruction-number
3385 * b .Lraisexcpt
3386 * .Lnoexcept:
3387 */
3388 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3389
3390 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3391 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3392 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3393
3394# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3395 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3396# else
3397 uint32_t const offFixup = off;
3398 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3399 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3400 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3401 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3402# endif
3403
3404 iemNativeRegFreeTmp(pReNative, iTmpReg);
3405
3406#else
3407# error "Port me"
3408#endif
3409 return off;
3410}
3411
3412
3413/**
3414 * Emits code to check if the content of @a idxAddrReg is within the limit of
3415 * idxSegReg, raising a \#GP(0) if it isn't.
3416 *
3417 * @returns New code buffer offset; throws VBox status code on error.
3418 * @param pReNative The native recompile state.
3419 * @param off The code buffer offset.
3420 * @param idxAddrReg The host register (32-bit) with the address to
3421 * check.
3422 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3423 * against.
3424 * @param idxInstr The current instruction.
3425 */
3426DECL_HIDDEN_THROW(uint32_t)
3427iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3428 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3429{
3430 /*
3431 * Make sure we don't have any outstanding guest register writes as we may
3432 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3433 */
3434 off = iemNativeRegFlushPendingWrites(pReNative, off);
3435
3436 /** @todo implement expand down/whatnot checking */
3437 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3438
3439 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3440 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3441 kIemNativeGstRegUse_ForUpdate);
3442
3443 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3444
3445#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3446 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3447 RT_NOREF(idxInstr);
3448#else
3449 uint32_t const offFixup = off;
3450 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3451 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3452 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3453 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3454#endif
3455
3456 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3457 return off;
3458}
3459
3460
3461/**
3462 * Emits a call to a CImpl function or something similar.
3463 */
3464static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3465 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3466 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3467{
3468 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3469 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3470
3471 /*
3472 * Load the parameters.
3473 */
3474#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3475 /* Special code the hidden VBOXSTRICTRC pointer. */
3476 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3477 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3478 if (cAddParams > 0)
3479 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3480 if (cAddParams > 1)
3481 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3482 if (cAddParams > 2)
3483 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3484 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3485
3486#else
3487 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3488 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3489 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3490 if (cAddParams > 0)
3491 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3492 if (cAddParams > 1)
3493 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3494 if (cAddParams > 2)
3495# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3496 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3497# else
3498 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3499# endif
3500#endif
3501
3502 /*
3503 * Make the call.
3504 */
3505 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3506
3507#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3508 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3509#endif
3510
3511 /*
3512 * Check the status code.
3513 */
3514 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3515}
3516
3517
3518/**
3519 * Emits a call to a threaded worker function.
3520 */
3521static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3522{
3523 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3524 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3525 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3526
3527#ifdef RT_ARCH_AMD64
3528 /* Load the parameters and emit the call. */
3529# ifdef RT_OS_WINDOWS
3530# ifndef VBOXSTRICTRC_STRICT_ENABLED
3531 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3532 if (cParams > 0)
3533 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3534 if (cParams > 1)
3535 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3536 if (cParams > 2)
3537 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3538# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3539 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3540 if (cParams > 0)
3541 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3542 if (cParams > 1)
3543 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3544 if (cParams > 2)
3545 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3546 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3547 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3548# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3549# else
3550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3551 if (cParams > 0)
3552 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3553 if (cParams > 1)
3554 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3555 if (cParams > 2)
3556 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3557# endif
3558
3559 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3560
3561# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3562 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3563# endif
3564
3565#elif RT_ARCH_ARM64
3566 /*
3567 * ARM64:
3568 */
3569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3570 if (cParams > 0)
3571 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3572 if (cParams > 1)
3573 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3574 if (cParams > 2)
3575 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3576
3577 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3578
3579#else
3580# error "port me"
3581#endif
3582
3583 /*
3584 * Check the status code.
3585 */
3586 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3587
3588 return off;
3589}
3590
3591
3592/**
3593 * Emits the code at the RaiseGP0 label.
3594 */
3595static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3596{
3597 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3598 if (idxLabel != UINT32_MAX)
3599 {
3600 iemNativeLabelDefine(pReNative, idxLabel, off);
3601
3602 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3603 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3604#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3605 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3606#endif
3607 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3608
3609 /* jump back to the return sequence. */
3610 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3611 }
3612 return off;
3613}
3614
3615
3616/**
3617 * Emits the code at the ReturnWithFlags label (returns
3618 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3619 */
3620static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3621{
3622 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3623 if (idxLabel != UINT32_MAX)
3624 {
3625 iemNativeLabelDefine(pReNative, idxLabel, off);
3626
3627 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3628
3629 /* jump back to the return sequence. */
3630 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3631 }
3632 return off;
3633}
3634
3635
3636/**
3637 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3638 */
3639static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3640{
3641 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3642 if (idxLabel != UINT32_MAX)
3643 {
3644 iemNativeLabelDefine(pReNative, idxLabel, off);
3645
3646 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3647
3648 /* jump back to the return sequence. */
3649 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3650 }
3651 return off;
3652}
3653
3654
3655/**
3656 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3657 */
3658static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3659{
3660 /*
3661 * Generate the rc + rcPassUp fiddling code if needed.
3662 */
3663 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3664 if (idxLabel != UINT32_MAX)
3665 {
3666 iemNativeLabelDefine(pReNative, idxLabel, off);
3667
3668 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3669#ifdef RT_ARCH_AMD64
3670# ifdef RT_OS_WINDOWS
3671# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3672 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3673# endif
3674 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3675 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3676# else
3677 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3678 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3679# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3680 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3681# endif
3682# endif
3683# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3684 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3685# endif
3686
3687#else
3688 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3689 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3690 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3691#endif
3692
3693 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3694 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3695 }
3696 return off;
3697}
3698
3699
3700/**
3701 * Emits a standard epilog.
3702 */
3703static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3704{
3705 *pidxReturnLabel = UINT32_MAX;
3706
3707 /*
3708 * Successful return, so clear the return register (eax, w0).
3709 */
3710 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3711
3712 /*
3713 * Define label for common return point.
3714 */
3715 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3716 *pidxReturnLabel = idxReturn;
3717
3718 /*
3719 * Restore registers and return.
3720 */
3721#ifdef RT_ARCH_AMD64
3722 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3723
3724 /* Reposition esp at the r15 restore point. */
3725 pbCodeBuf[off++] = X86_OP_REX_W;
3726 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3727 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3728 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3729
3730 /* Pop non-volatile registers and return */
3731 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3732 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3733 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3734 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3735 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3736 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3737 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3738 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3739# ifdef RT_OS_WINDOWS
3740 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3741 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3742# endif
3743 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3744 pbCodeBuf[off++] = 0xc9; /* leave */
3745 pbCodeBuf[off++] = 0xc3; /* ret */
3746 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3747
3748#elif RT_ARCH_ARM64
3749 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3750
3751 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3752 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3753 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3754 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3755 IEMNATIVE_FRAME_VAR_SIZE / 8);
3756 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3757 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3758 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3759 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3760 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3761 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3762 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3763 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3764 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3765 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3766 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3767 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3768
3769 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3770 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3771 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3772 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3773
3774 /* retab / ret */
3775# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3776 if (1)
3777 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3778 else
3779# endif
3780 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3781
3782#else
3783# error "port me"
3784#endif
3785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3786
3787 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3788}
3789
3790
3791/**
3792 * Emits a standard prolog.
3793 */
3794static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3795{
3796#ifdef RT_ARCH_AMD64
3797 /*
3798 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3799 * reserving 64 bytes for stack variables plus 4 non-register argument
3800 * slots. Fixed register assignment: xBX = pReNative;
3801 *
3802 * Since we always do the same register spilling, we can use the same
3803 * unwind description for all the code.
3804 */
3805 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3806 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3807 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3808 pbCodeBuf[off++] = 0x8b;
3809 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3810 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3811 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3812# ifdef RT_OS_WINDOWS
3813 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3814 pbCodeBuf[off++] = 0x8b;
3815 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3816 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3817 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3818# else
3819 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3820 pbCodeBuf[off++] = 0x8b;
3821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3822# endif
3823 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3824 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3825 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3826 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3827 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3828 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3829 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3830 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3831
3832 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3833 X86_GREG_xSP,
3834 IEMNATIVE_FRAME_ALIGN_SIZE
3835 + IEMNATIVE_FRAME_VAR_SIZE
3836 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3837 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3838 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3839 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3840 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3841
3842#elif RT_ARCH_ARM64
3843 /*
3844 * We set up a stack frame exactly like on x86, only we have to push the
3845 * return address our selves here. We save all non-volatile registers.
3846 */
3847 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3848
3849# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3850 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3851 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3852 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3853 /* pacibsp */
3854 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3855# endif
3856
3857 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3858 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3859 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3860 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3861 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3862 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3863 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3864 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3865 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3866 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3867 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3868 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3869 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3870 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3871 /* Save the BP and LR (ret address) registers at the top of the frame. */
3872 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3873 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3874 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3875 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3877 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3878
3879 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3880 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3881
3882 /* mov r28, r0 */
3883 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3884 /* mov r27, r1 */
3885 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3886
3887#else
3888# error "port me"
3889#endif
3890 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3891 return off;
3892}
3893
3894
3895
3896
3897/*********************************************************************************************************************************
3898* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
3899*********************************************************************************************************************************/
3900
3901#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3902 { \
3903 pReNative->fMc = (a_fMcFlags); \
3904 pReNative->fCImpl = (a_fCImplFlags); \
3905 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
3906
3907/** We have to get to the end in recompilation mode, as otherwise we won't
3908 * generate code for all the IEM_MC_IF_XXX branches. */
3909#define IEM_MC_END() \
3910 } return off
3911
3912
3913
3914/*********************************************************************************************************************************
3915* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
3916*********************************************************************************************************************************/
3917
3918#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3919 pReNative->fMc = 0; \
3920 pReNative->fCImpl = (a_fFlags); \
3921 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3922
3923
3924#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3925 pReNative->fMc = 0; \
3926 pReNative->fCImpl = (a_fFlags); \
3927 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3928
3929DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3930 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3931{
3932 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3933}
3934
3935
3936#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3937 pReNative->fMc = 0; \
3938 pReNative->fCImpl = (a_fFlags); \
3939 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3940
3941DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3942 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3943{
3944 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3945}
3946
3947
3948#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3949 pReNative->fMc = 0; \
3950 pReNative->fCImpl = (a_fFlags); \
3951 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3952
3953DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3954 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
3955 uint64_t uArg2)
3956{
3957 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3958}
3959
3960
3961
3962/*********************************************************************************************************************************
3963* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
3964*********************************************************************************************************************************/
3965
3966/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
3967 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
3968DECL_INLINE_THROW(uint32_t)
3969iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3970{
3971 /*
3972 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
3973 * return with special status code and make the execution loop deal with
3974 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
3975 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
3976 * could continue w/o interruption, it probably will drop into the
3977 * debugger, so not worth the effort of trying to services it here and we
3978 * just lump it in with the handling of the others.
3979 *
3980 * To simplify the code and the register state management even more (wrt
3981 * immediate in AND operation), we always update the flags and skip the
3982 * extra check associated conditional jump.
3983 */
3984 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
3985 <= UINT32_MAX);
3986 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3987 kIemNativeGstRegUse_ForUpdate);
3988 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
3989 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
3990 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
3991 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
3992 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
3993
3994 /* Free but don't flush the EFLAGS register. */
3995 iemNativeRegFreeTmp(pReNative, idxEflReg);
3996
3997 return off;
3998}
3999
4000
4001#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4002 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4003
4004#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4005 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4007
4008/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4009DECL_INLINE_THROW(uint32_t)
4010iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4011{
4012 /* Allocate a temporary PC register. */
4013 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4014
4015 /* Perform the addition and store the result. */
4016 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4017 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4018
4019 /* Free but don't flush the PC register. */
4020 iemNativeRegFreeTmp(pReNative, idxPcReg);
4021
4022 return off;
4023}
4024
4025
4026#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4027 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4028
4029#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4030 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4031 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4032
4033/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4034DECL_INLINE_THROW(uint32_t)
4035iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4036{
4037 /* Allocate a temporary PC register. */
4038 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4039
4040 /* Perform the addition and store the result. */
4041 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4042 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4043
4044 /* Free but don't flush the PC register. */
4045 iemNativeRegFreeTmp(pReNative, idxPcReg);
4046
4047 return off;
4048}
4049
4050
4051#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4052 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4053
4054#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4055 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4056 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4057
4058/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4059DECL_INLINE_THROW(uint32_t)
4060iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4061{
4062 /* Allocate a temporary PC register. */
4063 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4064
4065 /* Perform the addition and store the result. */
4066 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4067 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4068 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4069
4070 /* Free but don't flush the PC register. */
4071 iemNativeRegFreeTmp(pReNative, idxPcReg);
4072
4073 return off;
4074}
4075
4076
4077
4078/*********************************************************************************************************************************
4079* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4080*********************************************************************************************************************************/
4081
4082#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4083 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4084 (a_enmEffOpSize), pCallEntry->idxInstr)
4085
4086#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4087 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4088 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4089
4090#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4091 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4092 IEMMODE_16BIT, pCallEntry->idxInstr)
4093
4094#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4095 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4096 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4097
4098#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4099 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4100 IEMMODE_64BIT, pCallEntry->idxInstr)
4101
4102#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4103 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4104 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4105
4106/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4107 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4108 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4109DECL_INLINE_THROW(uint32_t)
4110iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4111 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4112{
4113 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4114
4115 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4116 off = iemNativeRegFlushPendingWrites(pReNative, off);
4117
4118 /* Allocate a temporary PC register. */
4119 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4120
4121 /* Perform the addition. */
4122 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4123
4124 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4125 {
4126 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4127 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4128 }
4129 else
4130 {
4131 /* Just truncate the result to 16-bit IP. */
4132 Assert(enmEffOpSize == IEMMODE_16BIT);
4133 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4134 }
4135 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4136
4137 /* Free but don't flush the PC register. */
4138 iemNativeRegFreeTmp(pReNative, idxPcReg);
4139
4140 return off;
4141}
4142
4143
4144#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4145 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4146 (a_enmEffOpSize), pCallEntry->idxInstr)
4147
4148#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4149 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4150 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4151
4152#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4153 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4154 IEMMODE_16BIT, pCallEntry->idxInstr)
4155
4156#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4157 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4158 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4159
4160#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4161 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4162 IEMMODE_32BIT, pCallEntry->idxInstr)
4163
4164#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4165 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4166 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4167
4168/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4169 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4170 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4171DECL_INLINE_THROW(uint32_t)
4172iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4173 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4174{
4175 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4176
4177 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4178 off = iemNativeRegFlushPendingWrites(pReNative, off);
4179
4180 /* Allocate a temporary PC register. */
4181 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4182
4183 /* Perform the addition. */
4184 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4185
4186 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4187 if (enmEffOpSize == IEMMODE_16BIT)
4188 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4189
4190 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4191 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4192
4193 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4194
4195 /* Free but don't flush the PC register. */
4196 iemNativeRegFreeTmp(pReNative, idxPcReg);
4197
4198 return off;
4199}
4200
4201
4202#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4203 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4204
4205#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4206 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4207 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4208
4209#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4210 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4211
4212#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4213 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4214 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4215
4216#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4217 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4218
4219#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4220 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4221 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4222
4223/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4224DECL_INLINE_THROW(uint32_t)
4225iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4226 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4227{
4228 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4229 off = iemNativeRegFlushPendingWrites(pReNative, off);
4230
4231 /* Allocate a temporary PC register. */
4232 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4233
4234 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4235 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4236 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4237 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4238 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4239
4240 /* Free but don't flush the PC register. */
4241 iemNativeRegFreeTmp(pReNative, idxPcReg);
4242
4243 return off;
4244}
4245
4246
4247
4248/*********************************************************************************************************************************
4249* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4250*********************************************************************************************************************************/
4251
4252/**
4253 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4254 *
4255 * @returns Pointer to the condition stack entry on success, NULL on failure
4256 * (too many nestings)
4257 */
4258DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4259{
4260 uint32_t const idxStack = pReNative->cCondDepth;
4261 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4262
4263 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4264 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4265
4266 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4267 pEntry->fInElse = false;
4268 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4269 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4270
4271 return pEntry;
4272}
4273
4274
4275/**
4276 * Start of the if-block, snapshotting the register and variable state.
4277 */
4278DECL_INLINE_THROW(void)
4279iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4280{
4281 Assert(offIfBlock != UINT32_MAX);
4282 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4283 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4284 Assert(!pEntry->fInElse);
4285
4286 /* Define the start of the IF block if request or for disassembly purposes. */
4287 if (idxLabelIf != UINT32_MAX)
4288 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4289#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4290 else
4291 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4292#else
4293 RT_NOREF(offIfBlock);
4294#endif
4295
4296 /* Copy the initial state so we can restore it in the 'else' block. */
4297 pEntry->InitialState = pReNative->Core;
4298}
4299
4300
4301#define IEM_MC_ELSE() } while (0); \
4302 off = iemNativeEmitElse(pReNative, off); \
4303 do {
4304
4305/** Emits code related to IEM_MC_ELSE. */
4306DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4307{
4308 /* Check sanity and get the conditional stack entry. */
4309 Assert(off != UINT32_MAX);
4310 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4311 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4312 Assert(!pEntry->fInElse);
4313
4314 /* Jump to the endif */
4315 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4316
4317 /* Define the else label and enter the else part of the condition. */
4318 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4319 pEntry->fInElse = true;
4320
4321 /* Snapshot the core state so we can do a merge at the endif and restore
4322 the snapshot we took at the start of the if-block. */
4323 pEntry->IfFinalState = pReNative->Core;
4324 pReNative->Core = pEntry->InitialState;
4325
4326 return off;
4327}
4328
4329
4330#define IEM_MC_ENDIF() } while (0); \
4331 off = iemNativeEmitEndIf(pReNative, off)
4332
4333/** Emits code related to IEM_MC_ENDIF. */
4334DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4335{
4336 /* Check sanity and get the conditional stack entry. */
4337 Assert(off != UINT32_MAX);
4338 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4339 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4340
4341 /*
4342 * Now we have find common group with the core state at the end of the
4343 * if-final. Use the smallest common denominator and just drop anything
4344 * that isn't the same in both states.
4345 */
4346 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4347 * which is why we're doing this at the end of the else-block.
4348 * But we'd need more info about future for that to be worth the effort. */
4349 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4350 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4351 {
4352 /* shadow guest stuff first. */
4353 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4354 if (fGstRegs)
4355 {
4356 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4357 do
4358 {
4359 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4360 fGstRegs &= ~RT_BIT_64(idxGstReg);
4361
4362 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4363 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4364 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4365 {
4366 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4367 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4368 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4369 }
4370 } while (fGstRegs);
4371 }
4372 else
4373 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4374
4375 /* Check variables next. For now we must require them to be identical
4376 or stuff we can recreate. */
4377 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4378 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4379 if (fVars)
4380 {
4381 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4382 do
4383 {
4384 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4385 fVars &= ~RT_BIT_32(idxVar);
4386
4387 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4388 {
4389 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4390 continue;
4391 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4392 {
4393 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4394 if (idxHstReg != UINT8_MAX)
4395 {
4396 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4397 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4398 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4399 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4400 }
4401 continue;
4402 }
4403 }
4404 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4405 continue;
4406
4407 /* Irreconcilable, so drop it. */
4408 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4409 if (idxHstReg != UINT8_MAX)
4410 {
4411 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4412 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4413 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4414 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4415 }
4416 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4417 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4418 } while (fVars);
4419 }
4420
4421 /* Finally, check that the host register allocations matches. */
4422 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4423 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4424 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4425 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4426 }
4427
4428 /*
4429 * Define the endif label and maybe the else one if we're still in the 'if' part.
4430 */
4431 if (!pEntry->fInElse)
4432 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4433 else
4434 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4435 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4436
4437 /* Pop the conditional stack.*/
4438 pReNative->cCondDepth -= 1;
4439
4440 return off;
4441}
4442
4443
4444#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4445 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4446 do {
4447
4448/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4449DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4450{
4451 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4452
4453 /* Get the eflags. */
4454 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4455 kIemNativeGstRegUse_ReadOnly);
4456
4457 /* Test and jump. */
4458 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4459
4460 /* Free but don't flush the EFlags register. */
4461 iemNativeRegFreeTmp(pReNative, idxEflReg);
4462
4463 /* Make a copy of the core state now as we start the if-block. */
4464 iemNativeCondStartIfBlock(pReNative, off);
4465
4466 return off;
4467}
4468
4469
4470#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4471 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4472 do {
4473
4474/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4475DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4476{
4477 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4478
4479 /* Get the eflags. */
4480 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4481 kIemNativeGstRegUse_ReadOnly);
4482
4483 /* Test and jump. */
4484 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4485
4486 /* Free but don't flush the EFlags register. */
4487 iemNativeRegFreeTmp(pReNative, idxEflReg);
4488
4489 /* Make a copy of the core state now as we start the if-block. */
4490 iemNativeCondStartIfBlock(pReNative, off);
4491
4492 return off;
4493}
4494
4495
4496#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4497 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4498 do {
4499
4500/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4501DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4502{
4503 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4504
4505 /* Get the eflags. */
4506 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4507 kIemNativeGstRegUse_ReadOnly);
4508
4509 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4510 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4511
4512 /* Test and jump. */
4513 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4514
4515 /* Free but don't flush the EFlags register. */
4516 iemNativeRegFreeTmp(pReNative, idxEflReg);
4517
4518 /* Make a copy of the core state now as we start the if-block. */
4519 iemNativeCondStartIfBlock(pReNative, off);
4520
4521 return off;
4522}
4523
4524
4525#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4526 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4527 do {
4528
4529/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4530DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4531{
4532 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4533
4534 /* Get the eflags. */
4535 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4536 kIemNativeGstRegUse_ReadOnly);
4537
4538 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4539 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4540
4541 /* Test and jump. */
4542 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4543
4544 /* Free but don't flush the EFlags register. */
4545 iemNativeRegFreeTmp(pReNative, idxEflReg);
4546
4547 /* Make a copy of the core state now as we start the if-block. */
4548 iemNativeCondStartIfBlock(pReNative, off);
4549
4550 return off;
4551}
4552
4553
4554#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4555 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4556 do {
4557
4558#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4559 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4560 do {
4561
4562/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4563DECL_INLINE_THROW(uint32_t)
4564iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4565 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4566{
4567 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4568
4569 /* Get the eflags. */
4570 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4571 kIemNativeGstRegUse_ReadOnly);
4572
4573 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4574 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4575
4576 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4577 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4578 Assert(iBitNo1 != iBitNo2);
4579
4580#ifdef RT_ARCH_AMD64
4581 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4582
4583 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4584 if (iBitNo1 > iBitNo2)
4585 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4586 else
4587 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4588 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4589
4590#elif defined(RT_ARCH_ARM64)
4591 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4592 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4593
4594 /* and tmpreg, eflreg, #1<<iBitNo1 */
4595 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4596
4597 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4598 if (iBitNo1 > iBitNo2)
4599 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4600 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4601 else
4602 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4603 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4604
4605 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4606
4607#else
4608# error "Port me"
4609#endif
4610
4611 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4612 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4613 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4614
4615 /* Free but don't flush the EFlags and tmp registers. */
4616 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4617 iemNativeRegFreeTmp(pReNative, idxEflReg);
4618
4619 /* Make a copy of the core state now as we start the if-block. */
4620 iemNativeCondStartIfBlock(pReNative, off);
4621
4622 return off;
4623}
4624
4625
4626#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4627 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4628 do {
4629
4630#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4631 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4632 do {
4633
4634/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4635 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4636DECL_INLINE_THROW(uint32_t)
4637iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4638 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4639{
4640 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4641
4642 /* We need an if-block label for the non-inverted variant. */
4643 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4644 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4645
4646 /* Get the eflags. */
4647 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4648 kIemNativeGstRegUse_ReadOnly);
4649
4650 /* Translate the flag masks to bit numbers. */
4651 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4652 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4653
4654 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4655 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4656 Assert(iBitNo1 != iBitNo);
4657
4658 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4659 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4660 Assert(iBitNo2 != iBitNo);
4661 Assert(iBitNo2 != iBitNo1);
4662
4663#ifdef RT_ARCH_AMD64
4664 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4665#elif defined(RT_ARCH_ARM64)
4666 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4667#endif
4668
4669 /* Check for the lone bit first. */
4670 if (!fInverted)
4671 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4672 else
4673 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4674
4675 /* Then extract and compare the other two bits. */
4676#ifdef RT_ARCH_AMD64
4677 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4678 if (iBitNo1 > iBitNo2)
4679 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4680 else
4681 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4682 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4683
4684#elif defined(RT_ARCH_ARM64)
4685 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4686
4687 /* and tmpreg, eflreg, #1<<iBitNo1 */
4688 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4689
4690 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4691 if (iBitNo1 > iBitNo2)
4692 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4693 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4694 else
4695 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4696 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4697
4698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4699
4700#else
4701# error "Port me"
4702#endif
4703
4704 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4705 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4706 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4707
4708 /* Free but don't flush the EFlags and tmp registers. */
4709 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4710 iemNativeRegFreeTmp(pReNative, idxEflReg);
4711
4712 /* Make a copy of the core state now as we start the if-block. */
4713 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4714
4715 return off;
4716}
4717
4718
4719#define IEM_MC_IF_CX_IS_NZ() \
4720 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4721 do {
4722
4723/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4724DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4725{
4726 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4727
4728 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4729 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4730 kIemNativeGstRegUse_ReadOnly);
4731 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4732 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4733
4734 iemNativeCondStartIfBlock(pReNative, off);
4735 return off;
4736}
4737
4738
4739#define IEM_MC_IF_ECX_IS_NZ() \
4740 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4741 do {
4742
4743#define IEM_MC_IF_RCX_IS_NZ() \
4744 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4745 do {
4746
4747/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4748DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4749{
4750 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4751
4752 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4753 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4754 kIemNativeGstRegUse_ReadOnly);
4755 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4756 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4757
4758 iemNativeCondStartIfBlock(pReNative, off);
4759 return off;
4760}
4761
4762
4763#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4764 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4765 do {
4766
4767#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4768 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4769 do {
4770
4771/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4772DECL_INLINE_THROW(uint32_t)
4773iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4774{
4775 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4776
4777 /* We have to load both RCX and EFLAGS before we can start branching,
4778 otherwise we'll end up in the else-block with an inconsistent
4779 register allocator state.
4780 Doing EFLAGS first as it's more likely to be loaded, right? */
4781 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4782 kIemNativeGstRegUse_ReadOnly);
4783 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4784 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4785 kIemNativeGstRegUse_ReadOnly);
4786
4787 /** @todo we could reduce this to a single branch instruction by spending a
4788 * temporary register and some setnz stuff. Not sure if loops are
4789 * worth it. */
4790 /* Check CX. */
4791 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4792
4793 /* Check the EFlags bit. */
4794 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4795 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4796 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4797 !fCheckIfSet /*fJmpIfSet*/);
4798
4799 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4800 iemNativeRegFreeTmp(pReNative, idxEflReg);
4801
4802 iemNativeCondStartIfBlock(pReNative, off);
4803 return off;
4804}
4805
4806
4807#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4808 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4809 do {
4810
4811#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4812 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4813 do {
4814
4815#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4816 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4817 do {
4818
4819#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4820 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4821 do {
4822
4823/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4824 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4825 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4826 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4827DECL_INLINE_THROW(uint32_t)
4828iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4829 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4830{
4831 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4832
4833 /* We have to load both RCX and EFLAGS before we can start branching,
4834 otherwise we'll end up in the else-block with an inconsistent
4835 register allocator state.
4836 Doing EFLAGS first as it's more likely to be loaded, right? */
4837 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4838 kIemNativeGstRegUse_ReadOnly);
4839 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4840 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4841 kIemNativeGstRegUse_ReadOnly);
4842
4843 /** @todo we could reduce this to a single branch instruction by spending a
4844 * temporary register and some setnz stuff. Not sure if loops are
4845 * worth it. */
4846 /* Check RCX/ECX. */
4847 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4848
4849 /* Check the EFlags bit. */
4850 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4851 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4852 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4853 !fCheckIfSet /*fJmpIfSet*/);
4854
4855 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4856 iemNativeRegFreeTmp(pReNative, idxEflReg);
4857
4858 iemNativeCondStartIfBlock(pReNative, off);
4859 return off;
4860}
4861
4862
4863
4864/*********************************************************************************************************************************
4865* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
4866*********************************************************************************************************************************/
4867/** Number of hidden arguments for CIMPL calls.
4868 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
4869#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4870# define IEM_CIMPL_HIDDEN_ARGS 3
4871#else
4872# define IEM_CIMPL_HIDDEN_ARGS 2
4873#endif
4874
4875#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
4876 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
4877
4878#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
4879 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
4880
4881#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
4882 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
4883
4884#define IEM_MC_LOCAL(a_Type, a_Name) \
4885 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
4886
4887#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
4888 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
4889
4890
4891/**
4892 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
4893 */
4894DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
4895{
4896 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
4897 return IEM_CIMPL_HIDDEN_ARGS;
4898 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
4899 return 1;
4900 return 0;
4901}
4902
4903
4904/**
4905 * Internal work that allocates a variable with kind set to
4906 * kIemNativeVarKind_Invalid and no current stack allocation.
4907 *
4908 * The kind will either be set by the caller or later when the variable is first
4909 * assigned a value.
4910 */
4911static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
4912{
4913 Assert(cbType > 0 && cbType <= 64);
4914 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
4915 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
4916 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
4917 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
4918 pReNative->Core.aVars[idxVar].cbVar = cbType;
4919 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
4920 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4921 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
4922 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
4923 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
4924 pReNative->Core.aVars[idxVar].u.uValue = 0;
4925 return idxVar;
4926}
4927
4928
4929/**
4930 * Internal work that allocates an argument variable w/o setting enmKind.
4931 */
4932static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
4933{
4934 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
4935 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
4936 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
4937
4938 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
4939 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
4940 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
4941 return idxVar;
4942}
4943
4944
4945/**
4946 * Changes the variable to a stack variable.
4947 *
4948 * Currently this is s only possible to do the first time the variable is used,
4949 * switching later is can be implemented but not done.
4950 *
4951 * @param pReNative The recompiler state.
4952 * @param idxVar The variable.
4953 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
4954 */
4955static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4956{
4957 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
4958 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4959 {
4960 /* We could in theory transition from immediate to stack as well, but it
4961 would involve the caller doing work storing the value on the stack. So,
4962 till that's required we only allow transition from invalid. */
4963 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
4964 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
4965 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
4966
4967 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
4968 {
4969 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
4970 {
4971 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
4972 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4973 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
4974 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
4975 return;
4976 }
4977 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
4978 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
4979 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
4980 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
4981 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
4982 uint32_t bmStack = ~pReNative->Core.bmStack;
4983 while (bmStack != UINT32_MAX)
4984 {
4985 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
4986 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4987 if (!(iSlot & fBitAlignMask))
4988 {
4989 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
4990 {
4991 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
4992 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
4993 return;
4994 }
4995 }
4996 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
4997 }
4998 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4999 }
5000 }
5001}
5002
5003
5004/**
5005 * Changes it to a variable with a constant value.
5006 *
5007 * This does not require stack storage as we know the value and can always
5008 * reload it, unless of course it's referenced.
5009 *
5010 * @param pReNative The recompiler state.
5011 * @param idxVar The variable.
5012 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5013 */
5014static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5015{
5016 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5017 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5018 {
5019 /* Only simple trasnsitions for now. */
5020 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5021 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5022 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5023 }
5024 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5025}
5026
5027
5028/**
5029 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5030 *
5031 * @param pReNative The recompiler state.
5032 * @param idxVar The variable.
5033 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5034 */
5035static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5036{
5037 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5038 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5039
5040 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5041 {
5042 /* Only simple trasnsitions for now. */
5043 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5044 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5045 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5046 }
5047 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5048
5049 /* Update the other variable, ensure it's a stack variable. */
5050 /** @todo handle variables with const values... that's go boom now. */
5051 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5052 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5053}
5054
5055
5056DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5057{
5058 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5059}
5060
5061
5062DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5063{
5064 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5065 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5066 return idxVar;
5067}
5068
5069
5070DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5071{
5072 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5073 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5074 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5076
5077 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5078 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5079 return idxArgVar;
5080}
5081
5082
5083DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5084{
5085 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5086 iemNativeVarSetKindToStack(pReNative, idxVar);
5087 return idxVar;
5088}
5089
5090
5091DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5092{
5093 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5094 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5095 return idxVar;
5096}
5097
5098
5099/**
5100 * Makes sure variable @a idxVar has a register assigned to it.
5101 *
5102 * @returns The host register number.
5103 * @param pReNative The recompiler state.
5104 * @param idxVar The variable.
5105 * @param poff Pointer to the instruction buffer offset.
5106 * In case a register needs to be freed up.
5107 */
5108DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5109{
5110 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5111
5112 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5113 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5114 return idxReg;
5115
5116 /*
5117 * We have to allocate a register for the variable, even if its a stack one
5118 * as we don't know if there are modification being made to it before its
5119 * finalized (todo: analyze and insert hints about that?).
5120 *
5121 * If we can, we try get the correct register for argument variables. This
5122 * is assuming that most argument variables are fetched as close as possible
5123 * to the actual call, so that there aren't any interfering hidden calls
5124 * (memory accesses, etc) inbetween.
5125 *
5126 * If we cannot or it's a variable, we make sure no argument registers
5127 * that will be used by this MC block will be allocated here, and we always
5128 * prefer non-volatile registers to avoid needing to spill stuff for internal
5129 * call.
5130 */
5131 /** @todo Detect too early argument value fetches and warn about hidden
5132 * calls causing less optimal code to be generated in the python script. */
5133
5134 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5135 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5136 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5137 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5138 else
5139 {
5140 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5141 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5142 & ~pReNative->Core.bmHstRegsWithGstShadow
5143 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5144 & fNotArgsMask;
5145 if (fRegs)
5146 {
5147 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5148 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5149 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5150 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5151 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5152 }
5153 else
5154 {
5155 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5156 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5157 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5158 }
5159 }
5160 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5161 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5162 return idxReg;
5163}
5164
5165
5166
5167/*********************************************************************************************************************************
5168* Emitters for IEM_MC_CALL_CIMPL_XXX *
5169*********************************************************************************************************************************/
5170
5171/**
5172 * Emits code to load a reference to the given guest register into @a idxGprDst.
5173 */
5174DECL_INLINE_THROW(uint32_t)
5175iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5176 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5177{
5178 /*
5179 * Get the offset relative to the CPUMCTX structure.
5180 */
5181 uint32_t offCpumCtx;
5182 switch (enmClass)
5183 {
5184 case kIemNativeGstRegRef_Gpr:
5185 Assert(idxRegInClass < 16);
5186 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5187 break;
5188
5189 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5190 Assert(idxRegInClass < 4);
5191 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5192 break;
5193
5194 case kIemNativeGstRegRef_EFlags:
5195 Assert(idxRegInClass == 0);
5196 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5197 break;
5198
5199 case kIemNativeGstRegRef_MxCsr:
5200 Assert(idxRegInClass == 0);
5201 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5202 break;
5203
5204 case kIemNativeGstRegRef_FpuReg:
5205 Assert(idxRegInClass < 8);
5206 AssertFailed(); /** @todo what kind of indexing? */
5207 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5208 break;
5209
5210 case kIemNativeGstRegRef_MReg:
5211 Assert(idxRegInClass < 8);
5212 AssertFailed(); /** @todo what kind of indexing? */
5213 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5214 break;
5215
5216 case kIemNativeGstRegRef_XReg:
5217 Assert(idxRegInClass < 16);
5218 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5219 break;
5220
5221 default:
5222 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5223 }
5224
5225 /*
5226 * Load the value into the destination register.
5227 */
5228#ifdef RT_ARCH_AMD64
5229 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5230
5231#elif defined(RT_ARCH_ARM64)
5232 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5233 Assert(offCpumCtx < 4096);
5234 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5235
5236#else
5237# error "Port me!"
5238#endif
5239
5240 return off;
5241}
5242
5243
5244/**
5245 * Common code for CIMPL and AIMPL calls.
5246 *
5247 * These are calls that uses argument variables and such. They should not be
5248 * confused with internal calls required to implement an MC operation,
5249 * like a TLB load and similar.
5250 *
5251 * Upon return all that is left to do is to load any hidden arguments and
5252 * perform the call. All argument variables are freed.
5253 *
5254 * @returns New code buffer offset; throws VBox status code on error.
5255 * @param pReNative The native recompile state.
5256 * @param off The code buffer offset.
5257 * @param cArgs The total nubmer of arguments (includes hidden
5258 * count).
5259 * @param cHiddenArgs The number of hidden arguments. The hidden
5260 * arguments must not have any variable declared for
5261 * them, whereas all the regular arguments must
5262 * (tstIEMCheckMc ensures this).
5263 */
5264DECL_HIDDEN_THROW(uint32_t)
5265iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5266{
5267#ifdef VBOX_STRICT
5268 /*
5269 * Assert sanity.
5270 */
5271 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5272 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5273 for (unsigned i = 0; i < cHiddenArgs; i++)
5274 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5275 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5276 {
5277 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5278 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5279 }
5280#endif
5281
5282 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5283
5284 /*
5285 * First, go over the host registers that will be used for arguments and make
5286 * sure they either hold the desired argument or are free.
5287 */
5288 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5289 for (uint32_t i = 0; i < cRegArgs; i++)
5290 {
5291 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5292 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5293 {
5294 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5295 {
5296 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5297 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5298 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5299 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5300 if (uArgNo == i)
5301 { /* prefect */ }
5302 else
5303 {
5304 /* The variable allocator logic should make sure this is impossible. */
5305 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5306
5307 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5308 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5309 else
5310 {
5311 /* just free it, can be reloaded if used again */
5312 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5313 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5314 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5315 }
5316 }
5317 }
5318 else
5319 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5320 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5321 }
5322 }
5323
5324 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5325
5326 /*
5327 * Make sure the argument variables are loaded into their respective registers.
5328 *
5329 * We can optimize this by ASSUMING that any register allocations are for
5330 * registeres that have already been loaded and are ready. The previous step
5331 * saw to that.
5332 */
5333 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5334 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5335 {
5336 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5337 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5338 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5339 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5340 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5341 else
5342 {
5343 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5344 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5345 {
5346 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5348 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5349 | RT_BIT_32(idxArgReg);
5350 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5351 }
5352 else
5353 {
5354 /* Use ARG0 as temp for stuff we need registers for. */
5355 switch (pReNative->Core.aVars[idxVar].enmKind)
5356 {
5357 case kIemNativeVarKind_Stack:
5358 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5360 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg,
5361 IEMNATIVE_FP_OFF_STACK_VARS
5362 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5363 continue;
5364
5365 case kIemNativeVarKind_Immediate:
5366 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5367 continue;
5368
5369 case kIemNativeVarKind_VarRef:
5370 {
5371 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5372 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5373 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5374 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5375 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5376 IEMNATIVE_FP_OFF_STACK_VARS
5377 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5378 continue;
5379 }
5380
5381 case kIemNativeVarKind_GstRegRef:
5382 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5383 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5384 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5385 continue;
5386
5387 case kIemNativeVarKind_Invalid:
5388 case kIemNativeVarKind_End:
5389 break;
5390 }
5391 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5392 }
5393 }
5394 }
5395#ifdef VBOX_STRICT
5396 else
5397 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5398 {
5399 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5400 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5401 }
5402#endif
5403
5404#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5405 /*
5406 * If there are any stack arguments, make sure they are in their place as well.
5407 *
5408 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5409 * caller will load it later and it must be free (see first loop).
5410 */
5411 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5412 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5413 {
5414 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5415 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5416 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5417 {
5418 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5419 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5420 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5421 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5422 }
5423 else
5424 {
5425 /* Use ARG0 as temp for stuff we need registers for. */
5426 switch (pReNative->Core.aVars[idxVar].enmKind)
5427 {
5428 case kIemNativeVarKind_Stack:
5429 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5430 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5431 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5432 IEMNATIVE_FP_OFF_STACK_VARS
5433 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5434 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5435 continue;
5436
5437 case kIemNativeVarKind_Immediate:
5438 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5439 continue;
5440
5441 case kIemNativeVarKind_VarRef:
5442 {
5443 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5444 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5445 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5446 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5447 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5448 IEMNATIVE_FP_OFF_STACK_VARS
5449 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5450 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5451 continue;
5452 }
5453
5454 case kIemNativeVarKind_GstRegRef:
5455 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5456 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5457 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5458 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5459 continue;
5460
5461 case kIemNativeVarKind_Invalid:
5462 case kIemNativeVarKind_End:
5463 break;
5464 }
5465 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5466 }
5467 }
5468#else
5469 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5470#endif
5471
5472 /*
5473 * Free all argument variables (simplified).
5474 * Their lifetime always expires with the call they are for.
5475 */
5476 /** @todo Make the python script check that arguments aren't used after
5477 * IEM_MC_CALL_XXXX. */
5478 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5479 {
5480 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
5481 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5482 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5483 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5484 }
5485 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5486
5487 /*
5488 * Flush volatile registers as we make the call.
5489 */
5490 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5491
5492 return off;
5493}
5494
5495
5496/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5497DECL_HIDDEN_THROW(uint32_t)
5498iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5499 uintptr_t pfnCImpl, uint8_t cArgs)
5500
5501{
5502 /*
5503 * Do all the call setup and cleanup.
5504 */
5505 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5506
5507 /*
5508 * Load the two hidden arguments.
5509 */
5510#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5511 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5512 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5513 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5514#else
5515 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5516 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5517#endif
5518
5519 /*
5520 * Make the call and check the return code.
5521 */
5522 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5523#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5524 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5525#endif
5526 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5527}
5528
5529
5530#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
5531 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0)
5532
5533/** Emits code for IEM_MC_CALL_CIMPL_1. */
5534DECL_INLINE_THROW(uint32_t)
5535iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5536 uintptr_t pfnCImpl, uint8_t idxArg0)
5537{
5538 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5539 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5540 RT_NOREF_PV(idxArg0);
5541
5542 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 1);
5543}
5544
5545
5546#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
5547 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1)
5548
5549/** Emits code for IEM_MC_CALL_CIMPL_2. */
5550DECL_INLINE_THROW(uint32_t)
5551iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5552 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
5553{
5554 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5555 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5556 RT_NOREF_PV(idxArg0);
5557
5558 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5559 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5560 RT_NOREF_PV(idxArg1);
5561
5562 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 2);
5563}
5564
5565
5566#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
5567 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2)
5568
5569/** Emits code for IEM_MC_CALL_CIMPL_3. */
5570DECL_INLINE_THROW(uint32_t)
5571iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5572 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
5573{
5574pReNative->pInstrBuf[off++] = 0xcc;
5575 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5576 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5577 RT_NOREF_PV(idxArg0);
5578
5579 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5580 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5581 RT_NOREF_PV(idxArg1);
5582
5583 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5584 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5585 RT_NOREF_PV(idxArg2);
5586
5587 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 3);
5588}
5589
5590
5591#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3) \
5592 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
5593
5594/** Emits code for IEM_MC_CALL_CIMPL_4. */
5595DECL_INLINE_THROW(uint32_t)
5596iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5597 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
5598{
5599pReNative->pInstrBuf[off++] = 0xcc;
5600 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5601 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5602 RT_NOREF_PV(idxArg0);
5603
5604 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5605 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5606 RT_NOREF_PV(idxArg1);
5607
5608 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5609 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5610 RT_NOREF_PV(idxArg2);
5611
5612 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5613 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5614 RT_NOREF_PV(idxArg3);
5615
5616 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 4);
5617}
5618
5619
5620#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3, a4) \
5621 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
5622
5623/** Emits code for IEM_MC_CALL_CIMPL_4. */
5624DECL_INLINE_THROW(uint32_t)
5625iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5626 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
5627{
5628pReNative->pInstrBuf[off++] = 0xcc;
5629 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5630 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5631 RT_NOREF_PV(idxArg0);
5632
5633 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5634 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5635 RT_NOREF_PV(idxArg1);
5636
5637 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5638 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5639 RT_NOREF_PV(idxArg2);
5640
5641 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5642 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5643 RT_NOREF_PV(idxArg3);
5644
5645 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
5646 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
5647 RT_NOREF_PV(idxArg4);
5648
5649 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 5);
5650}
5651
5652
5653
5654/*********************************************************************************************************************************
5655* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
5656*********************************************************************************************************************************/
5657
5658#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
5659 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
5660
5661/** Emits code for IEM_MC_FETCH_GREG_U16. */
5662DECL_INLINE_THROW(uint32_t)
5663iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
5664{
5665 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
5666 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
5667
5668 /*
5669 * We can either just load the low 16-bit of the GPR into a host register
5670 * for the variable, or we can do so via a shadow copy host register. The
5671 * latter will avoid having to reload it if it's being stored later, but
5672 * will waste a host register if it isn't touched again. Since we don't
5673 * know what going to happen, we choose the latter for now.
5674 */
5675 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5676 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5677 kIemNativeGstRegUse_ReadOnly);
5678
5679 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5680 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
5681 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
5682
5683 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
5684 return off;
5685}
5686
5687
5688
5689/*********************************************************************************************************************************
5690* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
5691*********************************************************************************************************************************/
5692
5693#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
5694 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
5695
5696/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
5697DECL_INLINE_THROW(uint32_t)
5698iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
5699{
5700 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5701 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
5702 kIemNativeGstRegUse_ForUpdate);
5703#ifdef RT_ARCH_AMD64
5704 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5705
5706 /* To the lowest byte of the register: mov r8, imm8 */
5707 if (iGRegEx < 16)
5708 {
5709 if (idxGstTmpReg >= 8)
5710 pbCodeBuf[off++] = X86_OP_REX_B;
5711 else if (idxGstTmpReg >= 4)
5712 pbCodeBuf[off++] = X86_OP_REX;
5713 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5714 pbCodeBuf[off++] = u8Value;
5715 }
5716 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5717 else if (idxGstTmpReg < 4)
5718 {
5719 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5720 pbCodeBuf[off++] = u8Value;
5721 }
5722 else
5723 {
5724 /* ror reg64, 8 */
5725 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5726 pbCodeBuf[off++] = 0xc1;
5727 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5728 pbCodeBuf[off++] = 8;
5729
5730 /* mov reg8, imm8 */
5731 if (idxGstTmpReg >= 8)
5732 pbCodeBuf[off++] = X86_OP_REX_B;
5733 else if (idxGstTmpReg >= 4)
5734 pbCodeBuf[off++] = X86_OP_REX;
5735 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5736 pbCodeBuf[off++] = u8Value;
5737
5738 /* rol reg64, 8 */
5739 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5740 pbCodeBuf[off++] = 0xc1;
5741 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5742 pbCodeBuf[off++] = 8;
5743 }
5744
5745#elif defined(RT_ARCH_ARM64)
5746 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5747 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5748 if (iGRegEx < 16)
5749 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5750 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5751 else
5752 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5753 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5754 iemNativeRegFreeTmp(pReNative, idxImmReg);
5755
5756#else
5757# error "Port me!"
5758#endif
5759
5760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5761
5762 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
5763
5764 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5765 return off;
5766}
5767
5768
5769/*
5770 * General purpose register manipulation (add, sub).
5771 */
5772
5773#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5774 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5775
5776/** Emits code for IEM_MC_SUB_GREG_U16. */
5777DECL_INLINE_THROW(uint32_t)
5778iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5779{
5780 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5781 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5782 kIemNativeGstRegUse_ForUpdate);
5783
5784#ifdef RT_ARCH_AMD64
5785 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5786 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5787 if (idxGstTmpReg >= 8)
5788 pbCodeBuf[off++] = X86_OP_REX_B;
5789 if (uSubtrahend)
5790 {
5791 pbCodeBuf[off++] = 0xff; /* dec */
5792 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5793 }
5794 else
5795 {
5796 pbCodeBuf[off++] = 0x81;
5797 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5798 pbCodeBuf[off++] = uSubtrahend;
5799 pbCodeBuf[off++] = 0;
5800 }
5801
5802#else
5803 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5804 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5805
5806 /* sub tmp, gstgrp, uSubtrahend */
5807 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5808
5809 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5810 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5811
5812 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5813#endif
5814
5815 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5816
5817 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5818
5819 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5820 return off;
5821}
5822
5823
5824#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5825 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5826
5827#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5828 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5829
5830/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5831DECL_INLINE_THROW(uint32_t)
5832iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5833{
5834 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5835 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5836 kIemNativeGstRegUse_ForUpdate);
5837
5838#ifdef RT_ARCH_AMD64
5839 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5840 if (f64Bit)
5841 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5842 else if (idxGstTmpReg >= 8)
5843 pbCodeBuf[off++] = X86_OP_REX_B;
5844 if (uSubtrahend == 1)
5845 {
5846 /* dec */
5847 pbCodeBuf[off++] = 0xff;
5848 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5849 }
5850 else if (uSubtrahend < 128)
5851 {
5852 pbCodeBuf[off++] = 0x83; /* sub */
5853 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5854 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5855 }
5856 else
5857 {
5858 pbCodeBuf[off++] = 0x81; /* sub */
5859 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5860 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5861 pbCodeBuf[off++] = 0;
5862 pbCodeBuf[off++] = 0;
5863 pbCodeBuf[off++] = 0;
5864 }
5865
5866#else
5867 /* sub tmp, gstgrp, uSubtrahend */
5868 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5869 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5870
5871#endif
5872
5873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5874
5875 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5876
5877 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5878 return off;
5879}
5880
5881
5882
5883/*********************************************************************************************************************************
5884* Builtin functions *
5885*********************************************************************************************************************************/
5886
5887/**
5888 * Built-in function that calls a C-implemention function taking zero arguments.
5889 */
5890static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
5891{
5892 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
5893 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
5894 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
5895}
5896
5897
5898/**
5899 * Built-in function that checks for pending interrupts that can be delivered or
5900 * forced action flags.
5901 *
5902 * This triggers after the completion of an instruction, so EIP is already at
5903 * the next instruction. If an IRQ or important FF is pending, this will return
5904 * a non-zero status that stops TB execution.
5905 */
5906static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
5907{
5908 RT_NOREF(pCallEntry);
5909
5910 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
5911 and I'm too lazy to create a 'Fixed' version of that one. */
5912 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
5913 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
5914
5915 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
5916
5917 /* Again, we need to load the extended EFLAGS before we actually need them
5918 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
5919 loaded them inside the check, as the shadow state would not be correct
5920 when the code branches before the load. Ditto PC. */
5921 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5922 kIemNativeGstRegUse_ReadOnly);
5923
5924 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
5925
5926 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5927
5928 /*
5929 * Start by checking the local forced actions of the EMT we're on for IRQs
5930 * and other FFs that needs servicing.
5931 */
5932 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
5933 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
5934 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
5935 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5936 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
5937 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
5938 | VMCPU_FF_TLB_FLUSH
5939 | VMCPU_FF_UNHALT ),
5940 true /*fSetFlags*/);
5941 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
5942 uint32_t const offFixupJumpToVmCheck1 = off;
5943 off = iemNativeEmitJzToFixed(pReNative, off, 0);
5944
5945 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
5946 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
5947 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5948 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
5949 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
5950 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5951
5952 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
5953 suppressed by the CPU or not. */
5954 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
5955 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
5956 idxLabelReturnBreak);
5957
5958 /* We've got shadow flags set, so we must check that the PC they are valid
5959 for matches our current PC value. */
5960 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
5961 * a register. */
5962 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
5963 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
5964
5965 /*
5966 * Now check the force flags of the VM.
5967 */
5968 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
5969 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
5970 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
5971 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
5972 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
5973 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5974
5975 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
5976
5977 /*
5978 * We're good, no IRQs or FFs pending.
5979 */
5980 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5981 iemNativeRegFreeTmp(pReNative, idxEflReg);
5982 iemNativeRegFreeTmp(pReNative, idxPcReg);
5983
5984 return off;
5985}
5986
5987
5988/**
5989 * Built-in function checks if IEMCPU::fExec has the expected value.
5990 */
5991static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
5992{
5993 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
5994 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5995
5996 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5997 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
5998 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
5999 kIemNativeLabelType_ReturnBreak);
6000 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6001 return off;
6002}
6003
6004
6005
6006/*********************************************************************************************************************************
6007* The native code generator functions for each MC block. *
6008*********************************************************************************************************************************/
6009
6010
6011/*
6012 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6013 *
6014 * This should probably live in it's own file later, but lets see what the
6015 * compile times turn out to be first.
6016 */
6017#include "IEMNativeFunctions.cpp.h"
6018
6019
6020
6021/*********************************************************************************************************************************
6022* Recompiler Core. *
6023*********************************************************************************************************************************/
6024
6025
6026/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6027static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6028{
6029 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6030 pDis->cbCachedInstr += cbMaxRead;
6031 RT_NOREF(cbMinRead);
6032 return VERR_NO_DATA;
6033}
6034
6035
6036/**
6037 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6038 * @returns pszBuf.
6039 * @param fFlags The flags.
6040 * @param pszBuf The output buffer.
6041 * @param cbBuf The output buffer size. At least 32 bytes.
6042 */
6043DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6044{
6045 Assert(cbBuf >= 32);
6046 static RTSTRTUPLE const s_aModes[] =
6047 {
6048 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6049 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6050 /* [02] = */ { RT_STR_TUPLE("!2!") },
6051 /* [03] = */ { RT_STR_TUPLE("!3!") },
6052 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6053 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6054 /* [06] = */ { RT_STR_TUPLE("!6!") },
6055 /* [07] = */ { RT_STR_TUPLE("!7!") },
6056 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6057 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6058 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6059 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6060 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6061 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6062 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6063 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6064 /* [10] = */ { RT_STR_TUPLE("!10!") },
6065 /* [11] = */ { RT_STR_TUPLE("!11!") },
6066 /* [12] = */ { RT_STR_TUPLE("!12!") },
6067 /* [13] = */ { RT_STR_TUPLE("!13!") },
6068 /* [14] = */ { RT_STR_TUPLE("!14!") },
6069 /* [15] = */ { RT_STR_TUPLE("!15!") },
6070 /* [16] = */ { RT_STR_TUPLE("!16!") },
6071 /* [17] = */ { RT_STR_TUPLE("!17!") },
6072 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6073 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6074 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6075 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6076 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6077 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6078 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6079 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6080 };
6081 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6082 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6083 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6084
6085 pszBuf[off++] = ' ';
6086 pszBuf[off++] = 'C';
6087 pszBuf[off++] = 'P';
6088 pszBuf[off++] = 'L';
6089 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6090 Assert(off < 32);
6091
6092 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6093
6094 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6095 {
6096 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6097 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6098 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6099 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6100 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6101 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6102 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6103 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6104 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6105 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6106 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6107 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6108 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6109 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6110 };
6111 if (fFlags)
6112 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6113 if (s_aFlags[i].fFlag & fFlags)
6114 {
6115 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6116 pszBuf[off++] = ' ';
6117 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6118 off += s_aFlags[i].cchName;
6119 fFlags &= ~s_aFlags[i].fFlag;
6120 if (!fFlags)
6121 break;
6122 }
6123 pszBuf[off] = '\0';
6124
6125 return pszBuf;
6126}
6127
6128
6129DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
6130{
6131 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
6132
6133 char szDisBuf[512];
6134 DISSTATE Dis;
6135 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
6136 uint32_t const cNative = pTb->Native.cInstructions;
6137 uint32_t offNative = 0;
6138#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6139 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
6140#endif
6141 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6142 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6143 : DISCPUMODE_64BIT;
6144#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6145 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
6146#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6147 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
6148#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6149# error "Port me"
6150#else
6151 csh hDisasm = ~(size_t)0;
6152# if defined(RT_ARCH_AMD64)
6153 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
6154# elif defined(RT_ARCH_ARM64)
6155 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
6156# else
6157# error "Port me"
6158# endif
6159 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
6160#endif
6161
6162 /*
6163 * Print TB info.
6164 */
6165 pHlp->pfnPrintf(pHlp,
6166 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
6167 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
6168 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
6169 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
6170#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6171 if (pDbgInfo && pDbgInfo->cEntries > 1)
6172 {
6173 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
6174
6175 /*
6176 * This disassembly is driven by the debug info which follows the native
6177 * code and indicates when it starts with the next guest instructions,
6178 * where labels are and such things.
6179 */
6180 uint32_t idxThreadedCall = 0;
6181 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
6182 uint8_t idxRange = UINT8_MAX;
6183 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
6184 uint32_t offRange = 0;
6185 uint32_t offOpcodes = 0;
6186 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
6187 uint32_t const cDbgEntries = pDbgInfo->cEntries;
6188 uint32_t iDbgEntry = 1;
6189 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
6190
6191 while (offNative < cNative)
6192 {
6193 /* If we're at or have passed the point where the next chunk of debug
6194 info starts, process it. */
6195 if (offDbgNativeNext <= offNative)
6196 {
6197 offDbgNativeNext = UINT32_MAX;
6198 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
6199 {
6200 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
6201 {
6202 case kIemTbDbgEntryType_GuestInstruction:
6203 {
6204 /* Did the exec flag change? */
6205 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
6206 {
6207 pHlp->pfnPrintf(pHlp,
6208 " fExec change %#08x -> %#08x %s\n",
6209 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6210 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6211 szDisBuf, sizeof(szDisBuf)));
6212 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
6213 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6214 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6215 : DISCPUMODE_64BIT;
6216 }
6217
6218 /* New opcode range? We need to fend up a spurious debug info entry here for cases
6219 where the compilation was aborted before the opcode was recorded and the actual
6220 instruction was translated to a threaded call. This may happen when we run out
6221 of ranges, or when some complicated interrupts/FFs are found to be pending or
6222 similar. So, we just deal with it here rather than in the compiler code as it
6223 is a lot simpler to do up here. */
6224 if ( idxRange == UINT8_MAX
6225 || idxRange >= cRanges
6226 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
6227 {
6228 idxRange += 1;
6229 if (idxRange < cRanges)
6230 offRange = 0;
6231 else
6232 continue;
6233 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
6234 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
6235 + (pTb->aRanges[idxRange].idxPhysPage == 0
6236 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6237 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
6238 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6239 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
6240 pTb->aRanges[idxRange].idxPhysPage);
6241 }
6242
6243 /* Disassemble the instruction. */
6244 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
6245 uint32_t cbInstr = 1;
6246 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6247 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
6248 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6249 if (RT_SUCCESS(rc))
6250 {
6251 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6252 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6253 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6254 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6255
6256 static unsigned const s_offMarker = 55;
6257 static char const s_szMarker[] = " ; <--- guest";
6258 if (cch < s_offMarker)
6259 {
6260 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
6261 cch = s_offMarker;
6262 }
6263 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
6264 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
6265
6266 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
6267 }
6268 else
6269 {
6270 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
6271 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
6272 cbInstr = 1;
6273 }
6274 GCPhysPc += cbInstr;
6275 offOpcodes += cbInstr;
6276 offRange += cbInstr;
6277 continue;
6278 }
6279
6280 case kIemTbDbgEntryType_ThreadedCall:
6281 pHlp->pfnPrintf(pHlp,
6282 " Call #%u to %s (%u args)%s\n",
6283 idxThreadedCall,
6284 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6285 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6286 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
6287 idxThreadedCall++;
6288 continue;
6289
6290 case kIemTbDbgEntryType_GuestRegShadowing:
6291 {
6292 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
6293 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
6294 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
6295 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
6296 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6297 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
6298 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
6299 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
6300 else
6301 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
6302 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
6303 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6304 continue;
6305 }
6306
6307 case kIemTbDbgEntryType_Label:
6308 {
6309 const char *pszName = "what_the_fudge";
6310 const char *pszComment = "";
6311 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
6312 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
6313 {
6314 case kIemNativeLabelType_Return:
6315 pszName = "Return";
6316 break;
6317 case kIemNativeLabelType_ReturnBreak:
6318 pszName = "ReturnBreak";
6319 break;
6320 case kIemNativeLabelType_ReturnWithFlags:
6321 pszName = "ReturnWithFlags";
6322 break;
6323 case kIemNativeLabelType_NonZeroRetOrPassUp:
6324 pszName = "NonZeroRetOrPassUp";
6325 break;
6326 case kIemNativeLabelType_RaiseGp0:
6327 pszName = "RaiseGp0";
6328 break;
6329 case kIemNativeLabelType_If:
6330 pszName = "If";
6331 fNumbered = true;
6332 break;
6333 case kIemNativeLabelType_Else:
6334 pszName = "Else";
6335 fNumbered = true;
6336 pszComment = " ; regs state restored pre-if-block";
6337 break;
6338 case kIemNativeLabelType_Endif:
6339 pszName = "Endif";
6340 fNumbered = true;
6341 break;
6342 case kIemNativeLabelType_CheckIrq:
6343 pszName = "CheckIrq_CheckVM";
6344 fNumbered = true;
6345 break;
6346 case kIemNativeLabelType_Invalid:
6347 case kIemNativeLabelType_End:
6348 break;
6349 }
6350 if (fNumbered)
6351 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
6352 else
6353 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
6354 continue;
6355 }
6356
6357 case kIemTbDbgEntryType_NativeOffset:
6358 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
6359 Assert(offDbgNativeNext > offNative);
6360 break;
6361
6362 default:
6363 AssertFailed();
6364 }
6365 iDbgEntry++;
6366 break;
6367 }
6368 }
6369
6370 /*
6371 * Disassemble the next native instruction.
6372 */
6373 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6374# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6375 uint32_t cbInstr = sizeof(paNative[0]);
6376 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6377 if (RT_SUCCESS(rc))
6378 {
6379# if defined(RT_ARCH_AMD64)
6380 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6381 {
6382 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6383 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6384 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
6385 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6386 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6387 uInfo & 0x8000 ? " - recompiled" : "");
6388 else
6389 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6390 }
6391 else
6392# endif
6393 {
6394# ifdef RT_ARCH_AMD64
6395 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6396 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6397 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6398 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6399# elif defined(RT_ARCH_ARM64)
6400 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6401 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6402 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6403# else
6404# error "Port me"
6405# endif
6406 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6407 }
6408 }
6409 else
6410 {
6411# if defined(RT_ARCH_AMD64)
6412 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6413 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6414# elif defined(RT_ARCH_ARM64)
6415 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6416# else
6417# error "Port me"
6418# endif
6419 cbInstr = sizeof(paNative[0]);
6420 }
6421 offNative += cbInstr / sizeof(paNative[0]);
6422
6423# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6424 cs_insn *pInstr;
6425 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6426 (uintptr_t)pNativeCur, 1, &pInstr);
6427 if (cInstrs > 0)
6428 {
6429 Assert(cInstrs == 1);
6430# if defined(RT_ARCH_AMD64)
6431 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6432 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6433# else
6434 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6435 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6436# endif
6437 offNative += pInstr->size / sizeof(*pNativeCur);
6438 cs_free(pInstr, cInstrs);
6439 }
6440 else
6441 {
6442# if defined(RT_ARCH_AMD64)
6443 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6444 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6445# else
6446 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6447# endif
6448 offNative++;
6449 }
6450# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6451 }
6452 }
6453 else
6454#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
6455 {
6456 /*
6457 * No debug info, just disassemble the x86 code and then the native code.
6458 *
6459 * First the guest code:
6460 */
6461 for (unsigned i = 0; i < pTb->cRanges; i++)
6462 {
6463 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
6464 + (pTb->aRanges[i].idxPhysPage == 0
6465 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6466 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
6467 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6468 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
6469 unsigned off = pTb->aRanges[i].offOpcodes;
6470 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
6471 while (off < cbOpcodes)
6472 {
6473 uint32_t cbInstr = 1;
6474 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6475 &pTb->pabOpcodes[off], cbOpcodes - off,
6476 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6477 if (RT_SUCCESS(rc))
6478 {
6479 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6480 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6481 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6482 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6483 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
6484 GCPhysPc += cbInstr;
6485 off += cbInstr;
6486 }
6487 else
6488 {
6489 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
6490 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
6491 break;
6492 }
6493 }
6494 }
6495
6496 /*
6497 * Then the native code:
6498 */
6499 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
6500 while (offNative < cNative)
6501 {
6502 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6503# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6504 uint32_t cbInstr = sizeof(paNative[0]);
6505 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6506 if (RT_SUCCESS(rc))
6507 {
6508# if defined(RT_ARCH_AMD64)
6509 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6510 {
6511 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6512 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6513 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
6514 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6515 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6516 uInfo & 0x8000 ? " - recompiled" : "");
6517 else
6518 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6519 }
6520 else
6521# endif
6522 {
6523# ifdef RT_ARCH_AMD64
6524 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6525 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6526 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6527 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6528# elif defined(RT_ARCH_ARM64)
6529 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6530 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6531 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6532# else
6533# error "Port me"
6534# endif
6535 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6536 }
6537 }
6538 else
6539 {
6540# if defined(RT_ARCH_AMD64)
6541 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6542 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6543# else
6544 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6545# endif
6546 cbInstr = sizeof(paNative[0]);
6547 }
6548 offNative += cbInstr / sizeof(paNative[0]);
6549
6550# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6551 cs_insn *pInstr;
6552 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6553 (uintptr_t)pNativeCur, 1, &pInstr);
6554 if (cInstrs > 0)
6555 {
6556 Assert(cInstrs == 1);
6557# if defined(RT_ARCH_AMD64)
6558 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6559 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6560# else
6561 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6562 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6563# endif
6564 offNative += pInstr->size / sizeof(*pNativeCur);
6565 cs_free(pInstr, cInstrs);
6566 }
6567 else
6568 {
6569# if defined(RT_ARCH_AMD64)
6570 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6571 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6572# else
6573 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6574# endif
6575 offNative++;
6576 }
6577# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6578 }
6579 }
6580
6581#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6582 /* Cleanup. */
6583 cs_close(&hDisasm);
6584#endif
6585}
6586
6587
6588/**
6589 * Recompiles the given threaded TB into a native one.
6590 *
6591 * In case of failure the translation block will be returned as-is.
6592 *
6593 * @returns pTb.
6594 * @param pVCpu The cross context virtual CPU structure of the calling
6595 * thread.
6596 * @param pTb The threaded translation to recompile to native.
6597 */
6598DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
6599{
6600 /*
6601 * The first time thru, we allocate the recompiler state, the other times
6602 * we just need to reset it before using it again.
6603 */
6604 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
6605 if (RT_LIKELY(pReNative))
6606 iemNativeReInit(pReNative, pTb);
6607 else
6608 {
6609 pReNative = iemNativeInit(pVCpu, pTb);
6610 AssertReturn(pReNative, pTb);
6611 }
6612
6613 /*
6614 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
6615 * for aborting if an error happens.
6616 */
6617 uint32_t cCallsLeft = pTb->Thrd.cCalls;
6618#ifdef LOG_ENABLED
6619 uint32_t const cCallsOrg = cCallsLeft;
6620#endif
6621 uint32_t off = 0;
6622 int rc = VINF_SUCCESS;
6623 IEMNATIVE_TRY_SETJMP(pReNative, rc)
6624 {
6625 /*
6626 * Emit prolog code (fixed).
6627 */
6628 off = iemNativeEmitProlog(pReNative, off);
6629
6630 /*
6631 * Convert the calls to native code.
6632 */
6633#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6634 int32_t iGstInstr = -1;
6635 uint32_t fExec = pTb->fFlags;
6636#endif
6637 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
6638 while (cCallsLeft-- > 0)
6639 {
6640 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
6641
6642 /*
6643 * Debug info and assembly markup.
6644 */
6645#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6646 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
6647 fExec = pCallEntry->auParams[0];
6648 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6649 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
6650 {
6651 if (iGstInstr < (int32_t)pTb->cInstructions)
6652 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
6653 else
6654 Assert(iGstInstr == pTb->cInstructions);
6655 iGstInstr = pCallEntry->idxInstr;
6656 }
6657 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
6658#endif
6659#if defined(VBOX_STRICT) && 1
6660 off = iemNativeEmitMarker(pReNative, off,
6661 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
6662 pCallEntry->enmFunction));
6663#endif
6664
6665 /*
6666 * Actual work.
6667 */
6668 if (pfnRecom) /** @todo stats on this. */
6669 {
6670 //STAM_COUNTER_INC()
6671 off = pfnRecom(pReNative, off, pCallEntry);
6672 }
6673 else
6674 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
6675 Assert(off <= pReNative->cInstrBufAlloc);
6676 Assert(pReNative->cCondDepth == 0);
6677
6678 /*
6679 * Advance.
6680 */
6681 pCallEntry++;
6682 }
6683
6684 /*
6685 * Emit the epilog code.
6686 */
6687 uint32_t idxReturnLabel;
6688 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
6689
6690 /*
6691 * Generate special jump labels.
6692 */
6693 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
6694 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
6695 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
6696 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
6697 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
6698 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
6699 }
6700 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
6701 {
6702 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
6703 return pTb;
6704 }
6705 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
6706 Assert(off <= pReNative->cInstrBufAlloc);
6707
6708 /*
6709 * Make sure all labels has been defined.
6710 */
6711 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
6712#ifdef VBOX_STRICT
6713 uint32_t const cLabels = pReNative->cLabels;
6714 for (uint32_t i = 0; i < cLabels; i++)
6715 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
6716#endif
6717
6718 /*
6719 * Allocate executable memory, copy over the code we've generated.
6720 */
6721 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
6722 if (pTbAllocator->pDelayedFreeHead)
6723 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
6724
6725 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
6726 AssertReturn(paFinalInstrBuf, pTb);
6727 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
6728
6729 /*
6730 * Apply fixups.
6731 */
6732 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
6733 uint32_t const cFixups = pReNative->cFixups;
6734 for (uint32_t i = 0; i < cFixups; i++)
6735 {
6736 Assert(paFixups[i].off < off);
6737 Assert(paFixups[i].idxLabel < cLabels);
6738 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
6739 switch (paFixups[i].enmType)
6740 {
6741#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6742 case kIemNativeFixupType_Rel32:
6743 Assert(paFixups[i].off + 4 <= off);
6744 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6745 continue;
6746
6747#elif defined(RT_ARCH_ARM64)
6748 case kIemNativeFixupType_RelImm26At0:
6749 {
6750 Assert(paFixups[i].off < off);
6751 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6752 Assert(offDisp >= -262144 && offDisp < 262144);
6753 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6754 continue;
6755 }
6756
6757 case kIemNativeFixupType_RelImm19At5:
6758 {
6759 Assert(paFixups[i].off < off);
6760 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6761 Assert(offDisp >= -262144 && offDisp < 262144);
6762 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6763 continue;
6764 }
6765
6766 case kIemNativeFixupType_RelImm14At5:
6767 {
6768 Assert(paFixups[i].off < off);
6769 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6770 Assert(offDisp >= -8192 && offDisp < 8192);
6771 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
6772 continue;
6773 }
6774
6775#endif
6776 case kIemNativeFixupType_Invalid:
6777 case kIemNativeFixupType_End:
6778 break;
6779 }
6780 AssertFailed();
6781 }
6782
6783 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
6784
6785 /*
6786 * Convert the translation block.
6787 */
6788 //RT_BREAKPOINT();
6789 RTMemFree(pTb->Thrd.paCalls);
6790 pTb->Native.paInstructions = paFinalInstrBuf;
6791 pTb->Native.cInstructions = off;
6792 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
6793#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6794 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
6795 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
6796#endif
6797
6798 Assert(pTbAllocator->cThreadedTbs > 0);
6799 pTbAllocator->cThreadedTbs -= 1;
6800 pTbAllocator->cNativeTbs += 1;
6801 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
6802
6803#ifdef LOG_ENABLED
6804 /*
6805 * Disassemble to the log if enabled.
6806 */
6807 if (LogIs3Enabled())
6808 {
6809 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
6810 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
6811 }
6812#endif
6813
6814 return pTb;
6815}
6816
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette