VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102083

Last change on this file since 102083 was 102083, checked in by vboxsync, 15 months ago

VMM/IEM: Fixed register handling for the RC variable in IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 calls (currently untested). bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 357.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102083 2023-11-13 13:16:17Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128
129/*********************************************************************************************************************************
130* Defined Constants And Macros *
131*********************************************************************************************************************************/
132/** Always count instructions for now. */
133#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
134
135
136/*********************************************************************************************************************************
137* Internal Functions *
138*********************************************************************************************************************************/
139#ifdef VBOX_STRICT
140static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
141 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
142static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
143#endif
144#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
145static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
146static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
147#endif
148DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
149DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
150 IEMNATIVEGSTREG enmGstReg, uint32_t off);
151
152
153/*********************************************************************************************************************************
154* Executable Memory Allocator *
155*********************************************************************************************************************************/
156/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157 * Use an alternative chunk sub-allocator that does store internal data
158 * in the chunk.
159 *
160 * Using the RTHeapSimple is not practial on newer darwin systems where
161 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
162 * memory. We would have to change the protection of the whole chunk for
163 * every call to RTHeapSimple, which would be rather expensive.
164 *
165 * This alternative implemenation let restrict page protection modifications
166 * to the pages backing the executable memory we just allocated.
167 */
168#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
169/** The chunk sub-allocation unit size in bytes. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
171/** The chunk sub-allocation unit size as a shift factor. */
172#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
173
174#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
175# ifdef IEMNATIVE_USE_GDB_JIT
176# define IEMNATIVE_USE_GDB_JIT_ET_DYN
177
178/** GDB JIT: Code entry. */
179typedef struct GDBJITCODEENTRY
180{
181 struct GDBJITCODEENTRY *pNext;
182 struct GDBJITCODEENTRY *pPrev;
183 uint8_t *pbSymFile;
184 uint64_t cbSymFile;
185} GDBJITCODEENTRY;
186
187/** GDB JIT: Actions. */
188typedef enum GDBJITACTIONS : uint32_t
189{
190 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
191} GDBJITACTIONS;
192
193/** GDB JIT: Descriptor. */
194typedef struct GDBJITDESCRIPTOR
195{
196 uint32_t uVersion;
197 GDBJITACTIONS enmAction;
198 GDBJITCODEENTRY *pRelevant;
199 GDBJITCODEENTRY *pHead;
200 /** Our addition: */
201 GDBJITCODEENTRY *pTail;
202} GDBJITDESCRIPTOR;
203
204/** GDB JIT: Our simple symbol file data. */
205typedef struct GDBJITSYMFILE
206{
207 Elf64_Ehdr EHdr;
208# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
209 Elf64_Shdr aShdrs[5];
210# else
211 Elf64_Shdr aShdrs[7];
212 Elf64_Phdr aPhdrs[2];
213# endif
214 /** The dwarf ehframe data for the chunk. */
215 uint8_t abEhFrame[512];
216 char szzStrTab[128];
217 Elf64_Sym aSymbols[3];
218# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
219 Elf64_Sym aDynSyms[2];
220 Elf64_Dyn aDyn[6];
221# endif
222} GDBJITSYMFILE;
223
224extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
225extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
226
227/** Init once for g_IemNativeGdbJitLock. */
228static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
229/** Init once for the critical section. */
230static RTCRITSECT g_IemNativeGdbJitLock;
231
232/** GDB reads the info here. */
233GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
234
235/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
236DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
237{
238 ASMNopPause();
239}
240
241/** @callback_method_impl{FNRTONCE} */
242static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
243{
244 RT_NOREF(pvUser);
245 return RTCritSectInit(&g_IemNativeGdbJitLock);
246}
247
248
249# endif /* IEMNATIVE_USE_GDB_JIT */
250
251/**
252 * Per-chunk unwind info for non-windows hosts.
253 */
254typedef struct IEMEXECMEMCHUNKEHFRAME
255{
256# ifdef IEMNATIVE_USE_LIBUNWIND
257 /** The offset of the FDA into abEhFrame. */
258 uintptr_t offFda;
259# else
260 /** 'struct object' storage area. */
261 uint8_t abObject[1024];
262# endif
263# ifdef IEMNATIVE_USE_GDB_JIT
264# if 0
265 /** The GDB JIT 'symbol file' data. */
266 GDBJITSYMFILE GdbJitSymFile;
267# endif
268 /** The GDB JIT list entry. */
269 GDBJITCODEENTRY GdbJitEntry;
270# endif
271 /** The dwarf ehframe data for the chunk. */
272 uint8_t abEhFrame[512];
273} IEMEXECMEMCHUNKEHFRAME;
274/** Pointer to per-chunk info info for non-windows hosts. */
275typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
276#endif
277
278
279/**
280 * An chunk of executable memory.
281 */
282typedef struct IEMEXECMEMCHUNK
283{
284#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
285 /** Number of free items in this chunk. */
286 uint32_t cFreeUnits;
287 /** Hint were to start searching for free space in the allocation bitmap. */
288 uint32_t idxFreeHint;
289#else
290 /** The heap handle. */
291 RTHEAPSIMPLE hHeap;
292#endif
293 /** Pointer to the chunk. */
294 void *pvChunk;
295#ifdef IN_RING3
296 /**
297 * Pointer to the unwind information.
298 *
299 * This is used during C++ throw and longjmp (windows and probably most other
300 * platforms). Some debuggers (windbg) makes use of it as well.
301 *
302 * Windows: This is allocated from hHeap on windows because (at least for
303 * AMD64) the UNWIND_INFO structure address in the
304 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
305 *
306 * Others: Allocated from the regular heap to avoid unnecessary executable data
307 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
308 void *pvUnwindInfo;
309#elif defined(IN_RING0)
310 /** Allocation handle. */
311 RTR0MEMOBJ hMemObj;
312#endif
313} IEMEXECMEMCHUNK;
314/** Pointer to a memory chunk. */
315typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
316
317
318/**
319 * Executable memory allocator for the native recompiler.
320 */
321typedef struct IEMEXECMEMALLOCATOR
322{
323 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
324 uint32_t uMagic;
325
326 /** The chunk size. */
327 uint32_t cbChunk;
328 /** The maximum number of chunks. */
329 uint32_t cMaxChunks;
330 /** The current number of chunks. */
331 uint32_t cChunks;
332 /** Hint where to start looking for available memory. */
333 uint32_t idxChunkHint;
334 /** Statistics: Current number of allocations. */
335 uint32_t cAllocations;
336
337 /** The total amount of memory available. */
338 uint64_t cbTotal;
339 /** Total amount of free memory. */
340 uint64_t cbFree;
341 /** Total amount of memory allocated. */
342 uint64_t cbAllocated;
343
344#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
345 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
346 *
347 * Since the chunk size is a power of two and the minimum chunk size is a lot
348 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
349 * require a whole number of uint64_t elements in the allocation bitmap. So,
350 * for sake of simplicity, they are allocated as one continous chunk for
351 * simplicity/laziness. */
352 uint64_t *pbmAlloc;
353 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
354 uint32_t cUnitsPerChunk;
355 /** Number of bitmap elements per chunk (for quickly locating the bitmap
356 * portion corresponding to an chunk). */
357 uint32_t cBitmapElementsPerChunk;
358#else
359 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
360 * @{ */
361 /** The size of the heap internal block header. This is used to adjust the
362 * request memory size to make sure there is exacly enough room for a header at
363 * the end of the blocks we allocate before the next 64 byte alignment line. */
364 uint32_t cbHeapBlockHdr;
365 /** The size of initial heap allocation required make sure the first
366 * allocation is correctly aligned. */
367 uint32_t cbHeapAlignTweak;
368 /** The alignment tweak allocation address. */
369 void *pvAlignTweak;
370 /** @} */
371#endif
372
373#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
374 /** Pointer to the array of unwind info running parallel to aChunks (same
375 * allocation as this structure, located after the bitmaps).
376 * (For Windows, the structures must reside in 32-bit RVA distance to the
377 * actual chunk, so they are allocated off the chunk.) */
378 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
379#endif
380
381 /** The allocation chunks. */
382 RT_FLEXIBLE_ARRAY_EXTENSION
383 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
384} IEMEXECMEMALLOCATOR;
385/** Pointer to an executable memory allocator. */
386typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
387
388/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
389#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
390
391
392static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
393
394
395/**
396 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
397 * the heap statistics.
398 */
399static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
400 uint32_t cbReq, uint32_t idxChunk)
401{
402 pExecMemAllocator->cAllocations += 1;
403 pExecMemAllocator->cbAllocated += cbReq;
404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
405 pExecMemAllocator->cbFree -= cbReq;
406#else
407 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
408#endif
409 pExecMemAllocator->idxChunkHint = idxChunk;
410
411#ifdef RT_OS_DARWIN
412 /*
413 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
414 * on darwin. So, we mark the pages returned as read+write after alloc and
415 * expect the caller to call iemExecMemAllocatorReadyForUse when done
416 * writing to the allocation.
417 *
418 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
419 * for details.
420 */
421 /** @todo detect if this is necessary... it wasn't required on 10.15 or
422 * whatever older version it was. */
423 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
424 AssertRC(rc);
425#endif
426
427 return pvRet;
428}
429
430
431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
432static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
433 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
434{
435 /*
436 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
437 */
438 Assert(!(cToScan & 63));
439 Assert(!(idxFirst & 63));
440 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
441 pbmAlloc += idxFirst / 64;
442
443 /*
444 * Scan the bitmap for cReqUnits of consequtive clear bits
445 */
446 /** @todo This can probably be done more efficiently for non-x86 systems. */
447 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
448 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
449 {
450 uint32_t idxAddBit = 1;
451 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
452 idxAddBit++;
453 if (idxAddBit >= cReqUnits)
454 {
455 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
456
457 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
458 pChunk->cFreeUnits -= cReqUnits;
459 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
460
461 void * const pvRet = (uint8_t *)pChunk->pvChunk
462 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
463
464 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
465 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
466 }
467
468 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
469 }
470 return NULL;
471}
472#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
473
474
475static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
476{
477#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
478 /*
479 * Figure out how much to allocate.
480 */
481 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
482 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
483 {
484 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
485 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
486 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
487 {
488 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
489 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
490 if (pvRet)
491 return pvRet;
492 }
493 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
494 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
495 cReqUnits, idxChunk);
496 }
497#else
498 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
499 if (pvRet)
500 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
501#endif
502 return NULL;
503
504}
505
506
507/**
508 * Allocates @a cbReq bytes of executable memory.
509 *
510 * @returns Pointer to the memory, NULL if out of memory or other problem
511 * encountered.
512 * @param pVCpu The cross context virtual CPU structure of the calling
513 * thread.
514 * @param cbReq How many bytes are required.
515 */
516static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
517{
518 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
519 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
520 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
521
522 /*
523 * Adjust the request size so it'll fit the allocator alignment/whatnot.
524 *
525 * For the RTHeapSimple allocator this means to follow the logic described
526 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
527 * existing chunks if we think we've got sufficient free memory around.
528 *
529 * While for the alternative one we just align it up to a whole unit size.
530 */
531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
532 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
533#else
534 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
535#endif
536 if (cbReq <= pExecMemAllocator->cbFree)
537 {
538 uint32_t const cChunks = pExecMemAllocator->cChunks;
539 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
540 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
541 {
542 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
543 if (pvRet)
544 return pvRet;
545 }
546 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
547 {
548 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
549 if (pvRet)
550 return pvRet;
551 }
552 }
553
554 /*
555 * Can we grow it with another chunk?
556 */
557 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
558 {
559 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
560 AssertLogRelRCReturn(rc, NULL);
561
562 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
563 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
564 if (pvRet)
565 return pvRet;
566 AssertFailed();
567 }
568
569 /* What now? Prune native translation blocks from the cache? */
570 AssertFailed();
571 return NULL;
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1588#ifndef _MSC_VER
1589 return VINF_IEM_RAISED_XCPT; /* not reached */
1590#endif
1591}
1592
1593
1594/**
1595 * Reinitializes the native recompiler state.
1596 *
1597 * Called before starting a new recompile job.
1598 */
1599static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1600{
1601 pReNative->cLabels = 0;
1602 pReNative->bmLabelTypes = 0;
1603 pReNative->cFixups = 0;
1604#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1605 pReNative->pDbgInfo->cEntries = 0;
1606#endif
1607 pReNative->pTbOrg = pTb;
1608 pReNative->cCondDepth = 0;
1609 pReNative->uCondSeqNo = 0;
1610 pReNative->uCheckIrqSeqNo = 0;
1611
1612 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1613#if IEMNATIVE_HST_GREG_COUNT < 32
1614 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1615#endif
1616 ;
1617 pReNative->Core.bmHstRegsWithGstShadow = 0;
1618 pReNative->Core.bmGstRegShadows = 0;
1619 pReNative->Core.bmVars = 0;
1620 pReNative->Core.bmStack = 0;
1621 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1622 pReNative->Core.u64ArgVars = UINT64_MAX;
1623
1624 /* Full host register reinit: */
1625 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1626 {
1627 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1628 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1629 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1630 }
1631
1632 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1633 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1634#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1635 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1636#endif
1637#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1638 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1639#endif
1640 );
1641 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1642 {
1643 fRegs &= ~RT_BIT_32(idxReg);
1644 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1645 }
1646
1647 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1648#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1649 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1650#endif
1651#ifdef IEMNATIVE_REG_FIXED_TMP0
1652 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1653#endif
1654 return pReNative;
1655}
1656
1657
1658/**
1659 * Allocates and initializes the native recompiler state.
1660 *
1661 * This is called the first time an EMT wants to recompile something.
1662 *
1663 * @returns Pointer to the new recompiler state.
1664 * @param pVCpu The cross context virtual CPU structure of the calling
1665 * thread.
1666 * @param pTb The TB that's about to be recompiled.
1667 * @thread EMT(pVCpu)
1668 */
1669static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1670{
1671 VMCPU_ASSERT_EMT(pVCpu);
1672
1673 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1674 AssertReturn(pReNative, NULL);
1675
1676 /*
1677 * Try allocate all the buffers and stuff we need.
1678 */
1679 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1680 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1681 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1682#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1683 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1684#endif
1685 if (RT_LIKELY( pReNative->pInstrBuf
1686 && pReNative->paLabels
1687 && pReNative->paFixups)
1688#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1689 && pReNative->pDbgInfo
1690#endif
1691 )
1692 {
1693 /*
1694 * Set the buffer & array sizes on success.
1695 */
1696 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1697 pReNative->cLabelsAlloc = _8K;
1698 pReNative->cFixupsAlloc = _16K;
1699#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1700 pReNative->cDbgInfoAlloc = _16K;
1701#endif
1702
1703 /*
1704 * Done, just need to save it and reinit it.
1705 */
1706 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1707 return iemNativeReInit(pReNative, pTb);
1708 }
1709
1710 /*
1711 * Failed. Cleanup and return.
1712 */
1713 AssertFailed();
1714 RTMemFree(pReNative->pInstrBuf);
1715 RTMemFree(pReNative->paLabels);
1716 RTMemFree(pReNative->paFixups);
1717#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1718 RTMemFree(pReNative->pDbgInfo);
1719#endif
1720 RTMemFree(pReNative);
1721 return NULL;
1722}
1723
1724
1725/**
1726 * Creates a label
1727 *
1728 * If the label does not yet have a defined position,
1729 * call iemNativeLabelDefine() later to set it.
1730 *
1731 * @returns Label ID. Throws VBox status code on failure, so no need to check
1732 * the return value.
1733 * @param pReNative The native recompile state.
1734 * @param enmType The label type.
1735 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1736 * label is not yet defined (default).
1737 * @param uData Data associated with the lable. Only applicable to
1738 * certain type of labels. Default is zero.
1739 */
1740DECL_HIDDEN_THROW(uint32_t)
1741iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1742 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1743{
1744 /*
1745 * Locate existing label definition.
1746 *
1747 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1748 * and uData is zero.
1749 */
1750 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1751 uint32_t const cLabels = pReNative->cLabels;
1752 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1753#ifndef VBOX_STRICT
1754 && offWhere == UINT32_MAX
1755 && uData == 0
1756#endif
1757 )
1758 {
1759 /** @todo Since this is only used for labels with uData = 0, just use a
1760 * lookup array? */
1761 for (uint32_t i = 0; i < cLabels; i++)
1762 if ( paLabels[i].enmType == enmType
1763 && paLabels[i].uData == uData)
1764 {
1765#ifdef VBOX_STRICT
1766 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1767 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1768#endif
1769 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1770 return i;
1771 }
1772 }
1773
1774 /*
1775 * Make sure we've got room for another label.
1776 */
1777 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1778 { /* likely */ }
1779 else
1780 {
1781 uint32_t cNew = pReNative->cLabelsAlloc;
1782 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1783 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1784 cNew *= 2;
1785 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1786 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1787 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1788 pReNative->paLabels = paLabels;
1789 pReNative->cLabelsAlloc = cNew;
1790 }
1791
1792 /*
1793 * Define a new label.
1794 */
1795 paLabels[cLabels].off = offWhere;
1796 paLabels[cLabels].enmType = enmType;
1797 paLabels[cLabels].uData = uData;
1798 pReNative->cLabels = cLabels + 1;
1799
1800 Assert((unsigned)enmType < 64);
1801 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1802
1803 if (offWhere != UINT32_MAX)
1804 {
1805#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1806 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1807 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1808#endif
1809 }
1810 return cLabels;
1811}
1812
1813
1814/**
1815 * Defines the location of an existing label.
1816 *
1817 * @param pReNative The native recompile state.
1818 * @param idxLabel The label to define.
1819 * @param offWhere The position.
1820 */
1821DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1822{
1823 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1824 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1825 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1826 pLabel->off = offWhere;
1827#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1828 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1829 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1830#endif
1831}
1832
1833
1834/**
1835 * Looks up a lable.
1836 *
1837 * @returns Label ID if found, UINT32_MAX if not.
1838 */
1839static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1840 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1841{
1842 Assert((unsigned)enmType < 64);
1843 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1844 {
1845 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1846 uint32_t const cLabels = pReNative->cLabels;
1847 for (uint32_t i = 0; i < cLabels; i++)
1848 if ( paLabels[i].enmType == enmType
1849 && paLabels[i].uData == uData
1850 && ( paLabels[i].off == offWhere
1851 || offWhere == UINT32_MAX
1852 || paLabels[i].off == UINT32_MAX))
1853 return i;
1854 }
1855 return UINT32_MAX;
1856}
1857
1858
1859/**
1860 * Adds a fixup.
1861 *
1862 * @throws VBox status code (int) on failure.
1863 * @param pReNative The native recompile state.
1864 * @param offWhere The instruction offset of the fixup location.
1865 * @param idxLabel The target label ID for the fixup.
1866 * @param enmType The fixup type.
1867 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1868 */
1869DECL_HIDDEN_THROW(void)
1870iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1871 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1872{
1873 Assert(idxLabel <= UINT16_MAX);
1874 Assert((unsigned)enmType <= UINT8_MAX);
1875
1876 /*
1877 * Make sure we've room.
1878 */
1879 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1880 uint32_t const cFixups = pReNative->cFixups;
1881 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1882 { /* likely */ }
1883 else
1884 {
1885 uint32_t cNew = pReNative->cFixupsAlloc;
1886 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1887 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1888 cNew *= 2;
1889 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1890 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1891 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1892 pReNative->paFixups = paFixups;
1893 pReNative->cFixupsAlloc = cNew;
1894 }
1895
1896 /*
1897 * Add the fixup.
1898 */
1899 paFixups[cFixups].off = offWhere;
1900 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1901 paFixups[cFixups].enmType = enmType;
1902 paFixups[cFixups].offAddend = offAddend;
1903 pReNative->cFixups = cFixups + 1;
1904}
1905
1906
1907/**
1908 * Slow code path for iemNativeInstrBufEnsure.
1909 */
1910DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1911{
1912 /* Double the buffer size till we meet the request. */
1913 uint32_t cNew = pReNative->cInstrBufAlloc;
1914 AssertReturn(cNew > 0, NULL);
1915 do
1916 cNew *= 2;
1917 while (cNew < off + cInstrReq);
1918
1919 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1920#ifdef RT_ARCH_ARM64
1921 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1922#else
1923 uint32_t const cbMaxInstrBuf = _2M;
1924#endif
1925 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1926
1927 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1928 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1929
1930 pReNative->cInstrBufAlloc = cNew;
1931 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1932}
1933
1934#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1935
1936/**
1937 * Grows the static debug info array used during recompilation.
1938 *
1939 * @returns Pointer to the new debug info block; throws VBox status code on
1940 * failure, so no need to check the return value.
1941 */
1942DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1943{
1944 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1945 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1946 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1947 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1948 pReNative->pDbgInfo = pDbgInfo;
1949 pReNative->cDbgInfoAlloc = cNew;
1950 return pDbgInfo;
1951}
1952
1953
1954/**
1955 * Adds a new debug info uninitialized entry, returning the pointer to it.
1956 */
1957DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1958{
1959 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1960 { /* likely */ }
1961 else
1962 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1963 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1964}
1965
1966
1967/**
1968 * Debug Info: Adds a native offset record, if necessary.
1969 */
1970static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1971{
1972 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1973
1974 /*
1975 * Search backwards to see if we've got a similar record already.
1976 */
1977 uint32_t idx = pDbgInfo->cEntries;
1978 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1979 while (idx-- > idxStop)
1980 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1981 {
1982 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1983 return;
1984 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1985 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1986 break;
1987 }
1988
1989 /*
1990 * Add it.
1991 */
1992 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1993 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1994 pEntry->NativeOffset.offNative = off;
1995}
1996
1997
1998/**
1999 * Debug Info: Record info about a label.
2000 */
2001static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2002{
2003 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2004 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2005 pEntry->Label.uUnused = 0;
2006 pEntry->Label.enmLabel = (uint8_t)enmType;
2007 pEntry->Label.uData = uData;
2008}
2009
2010
2011/**
2012 * Debug Info: Record info about a threaded call.
2013 */
2014static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2015{
2016 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2017 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2018 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2019 pEntry->ThreadedCall.uUnused = 0;
2020 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2021}
2022
2023
2024/**
2025 * Debug Info: Record info about a new guest instruction.
2026 */
2027static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2028{
2029 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2030 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2031 pEntry->GuestInstruction.uUnused = 0;
2032 pEntry->GuestInstruction.fExec = fExec;
2033}
2034
2035
2036/**
2037 * Debug Info: Record info about guest register shadowing.
2038 */
2039static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2040 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2041{
2042 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2043 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2044 pEntry->GuestRegShadowing.uUnused = 0;
2045 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2046 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2047 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2048}
2049
2050#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2051
2052
2053/*********************************************************************************************************************************
2054* Register Allocator *
2055*********************************************************************************************************************************/
2056
2057/**
2058 * Register parameter indexes (indexed by argument number).
2059 */
2060DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2061{
2062 IEMNATIVE_CALL_ARG0_GREG,
2063 IEMNATIVE_CALL_ARG1_GREG,
2064 IEMNATIVE_CALL_ARG2_GREG,
2065 IEMNATIVE_CALL_ARG3_GREG,
2066#if defined(IEMNATIVE_CALL_ARG4_GREG)
2067 IEMNATIVE_CALL_ARG4_GREG,
2068# if defined(IEMNATIVE_CALL_ARG5_GREG)
2069 IEMNATIVE_CALL_ARG5_GREG,
2070# if defined(IEMNATIVE_CALL_ARG6_GREG)
2071 IEMNATIVE_CALL_ARG6_GREG,
2072# if defined(IEMNATIVE_CALL_ARG7_GREG)
2073 IEMNATIVE_CALL_ARG7_GREG,
2074# endif
2075# endif
2076# endif
2077#endif
2078};
2079
2080/**
2081 * Call register masks indexed by argument count.
2082 */
2083DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2084{
2085 0,
2086 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2087 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2088 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2089 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2090 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2091#if defined(IEMNATIVE_CALL_ARG4_GREG)
2092 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2093 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2094# if defined(IEMNATIVE_CALL_ARG5_GREG)
2095 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2096 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2097# if defined(IEMNATIVE_CALL_ARG6_GREG)
2098 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2099 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2100 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2101# if defined(IEMNATIVE_CALL_ARG7_GREG)
2102 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2103 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2104 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2105# endif
2106# endif
2107# endif
2108#endif
2109};
2110
2111#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2112/**
2113 * BP offset of the stack argument slots.
2114 *
2115 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2116 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2117 */
2118DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2119{
2120 IEMNATIVE_FP_OFF_STACK_ARG0,
2121# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2122 IEMNATIVE_FP_OFF_STACK_ARG1,
2123# endif
2124# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2125 IEMNATIVE_FP_OFF_STACK_ARG2,
2126# endif
2127# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2128 IEMNATIVE_FP_OFF_STACK_ARG3,
2129# endif
2130};
2131AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2132#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2133
2134/**
2135 * Info about shadowed guest register values.
2136 * @see IEMNATIVEGSTREG
2137 */
2138static struct
2139{
2140 /** Offset in VMCPU. */
2141 uint32_t off;
2142 /** The field size. */
2143 uint8_t cb;
2144 /** Name (for logging). */
2145 const char *pszName;
2146} const g_aGstShadowInfo[] =
2147{
2148#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2160 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2161 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2162 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2163 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2164 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2165 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2166 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2167 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2168 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2169 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2170 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2171 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2172 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2173 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2174 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2175 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2176 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2177 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2178 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2179 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2180 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2181 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2182 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2183 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2184 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2185#undef CPUMCTX_OFF_AND_SIZE
2186};
2187AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2188
2189
2190/** Host CPU general purpose register names. */
2191DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2192{
2193#ifdef RT_ARCH_AMD64
2194 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2195#elif RT_ARCH_ARM64
2196 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2197 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2198#else
2199# error "port me"
2200#endif
2201};
2202
2203
2204DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2205 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2206{
2207 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2208
2209 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2210 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2211 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2212 return (uint8_t)idxReg;
2213}
2214
2215
2216/**
2217 * Tries to locate a suitable register in the given register mask.
2218 *
2219 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2220 * failed.
2221 *
2222 * @returns Host register number on success, returns UINT8_MAX on failure.
2223 */
2224static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2225{
2226 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2227 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2228 if (fRegs)
2229 {
2230 /** @todo pick better here: */
2231 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2232
2233 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2234 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2235 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2236 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2237
2238 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2239 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2240 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2241 return idxReg;
2242 }
2243 return UINT8_MAX;
2244}
2245
2246
2247/**
2248 * Locate a register, possibly freeing one up.
2249 *
2250 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2251 * failed.
2252 *
2253 * @returns Host register number on success. Returns UINT8_MAX if no registers
2254 * found, the caller is supposed to deal with this and raise a
2255 * allocation type specific status code (if desired).
2256 *
2257 * @throws VBox status code if we're run into trouble spilling a variable of
2258 * recording debug info. Does NOT throw anything if we're out of
2259 * registers, though.
2260 */
2261static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2262 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2263{
2264 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2265 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2266
2267 /*
2268 * Try a freed register that's shadowing a guest register
2269 */
2270 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2271 if (fRegs)
2272 {
2273 unsigned const idxReg = (fPreferVolatile
2274 ? ASMBitFirstSetU32(fRegs)
2275 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2276 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2277 - 1;
2278
2279 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2280 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2281 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2282 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2283
2284 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2285 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2286 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2287 return idxReg;
2288 }
2289
2290 /*
2291 * Try free up a variable that's in a register.
2292 *
2293 * We do two rounds here, first evacuating variables we don't need to be
2294 * saved on the stack, then in the second round move things to the stack.
2295 */
2296 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2297 {
2298 uint32_t fVars = pReNative->Core.bmVars;
2299 while (fVars)
2300 {
2301 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2302 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2303 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2304 && (RT_BIT_32(idxReg) & fRegMask)
2305 && ( iLoop == 0
2306 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2307 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2308 {
2309 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2310 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2311 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2312 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2313 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2314 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2315
2316 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2317 {
2318 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
2319 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2320 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeVarCalcBpDisp(pReNative, idxVar), idxReg);
2321 }
2322
2323 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2324 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2325 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2326 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2327 return idxReg;
2328 }
2329 fVars &= ~RT_BIT_32(idxVar);
2330 }
2331 }
2332
2333 return UINT8_MAX;
2334}
2335
2336
2337/**
2338 * Moves a variable to a different register or spills it onto the stack.
2339 *
2340 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2341 * kinds can easily be recreated if needed later.
2342 *
2343 * @returns The new code buffer position, UINT32_MAX on failure.
2344 * @param pReNative The native recompile state.
2345 * @param off The current code buffer position.
2346 * @param idxVar The variable index.
2347 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2348 * call-volatile registers.
2349 */
2350static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2351 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2352{
2353 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2354 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2355
2356 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2357 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2358 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2359 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2360 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2361 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2362 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2363 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2364 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2365
2366
2367 /** @todo Add statistics on this.*/
2368 /** @todo Implement basic variable liveness analysis (python) so variables
2369 * can be freed immediately once no longer used. This has the potential to
2370 * be trashing registers and stack for dead variables. */
2371
2372 /*
2373 * First try move it to a different register, as that's cheaper.
2374 */
2375 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2376 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2377 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2378 if (fRegs)
2379 {
2380 /* Avoid using shadow registers, if possible. */
2381 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2382 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2383 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2384 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2385
2386 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2387 Log12(("iemNativeRegMoveOrSpillStackVar: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
2388 idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2389 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2390 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2391 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2392 if (fGstRegShadows)
2393 {
2394 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2395 while (fGstRegShadows)
2396 {
2397 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2398 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2399
2400 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2401 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2402 }
2403 }
2404
2405 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2406 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2407 }
2408 /*
2409 * Otherwise we must spill the register onto the stack.
2410 */
2411 else
2412 {
2413 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
2414 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
2415 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
2416 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2417 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
2418
2419 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2420 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2421 }
2422
2423 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2424 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2425 return off;
2426}
2427
2428
2429/**
2430 * Allocates a temporary host general purpose register.
2431 *
2432 * This may emit code to save register content onto the stack in order to free
2433 * up a register.
2434 *
2435 * @returns The host register number; throws VBox status code on failure,
2436 * so no need to check the return value.
2437 * @param pReNative The native recompile state.
2438 * @param poff Pointer to the variable with the code buffer position.
2439 * This will be update if we need to move a variable from
2440 * register to stack in order to satisfy the request.
2441 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2442 * registers (@c true, default) or the other way around
2443 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2444 */
2445DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2446{
2447 /*
2448 * Try find a completely unused register, preferably a call-volatile one.
2449 */
2450 uint8_t idxReg;
2451 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2452 & ~pReNative->Core.bmHstRegsWithGstShadow
2453 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2454 if (fRegs)
2455 {
2456 if (fPreferVolatile)
2457 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2458 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2459 else
2460 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2461 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2462 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2463 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2464 }
2465 else
2466 {
2467 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2468 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2469 }
2470 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2471}
2472
2473
2474/**
2475 * Allocates a temporary register for loading an immediate value into.
2476 *
2477 * This will emit code to load the immediate, unless there happens to be an
2478 * unused register with the value already loaded.
2479 *
2480 * The caller will not modify the returned register, it must be considered
2481 * read-only. Free using iemNativeRegFreeTmpImm.
2482 *
2483 * @returns The host register number; throws VBox status code on failure, so no
2484 * need to check the return value.
2485 * @param pReNative The native recompile state.
2486 * @param poff Pointer to the variable with the code buffer position.
2487 * @param uImm The immediate value that the register must hold upon
2488 * return.
2489 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2490 * registers (@c true, default) or the other way around
2491 * (@c false).
2492 *
2493 * @note Reusing immediate values has not been implemented yet.
2494 */
2495DECL_HIDDEN_THROW(uint8_t)
2496iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2497{
2498 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2499 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2500 return idxReg;
2501}
2502
2503
2504/**
2505 * Marks host register @a idxHstReg as containing a shadow copy of guest
2506 * register @a enmGstReg.
2507 *
2508 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2509 * host register before calling.
2510 */
2511DECL_FORCE_INLINE(void)
2512iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2513{
2514 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2515 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2516
2517 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2518 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2519 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2520 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2521#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2522 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2523 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2524#else
2525 RT_NOREF(off);
2526#endif
2527}
2528
2529
2530/**
2531 * Clear any guest register shadow claims from @a idxHstReg.
2532 *
2533 * The register does not need to be shadowing any guest registers.
2534 */
2535DECL_FORCE_INLINE(void)
2536iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2537{
2538 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2539 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2540 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2541 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2542 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2543
2544#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2545 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2546 if (fGstRegs)
2547 {
2548 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
2549 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2550 while (fGstRegs)
2551 {
2552 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2553 fGstRegs &= ~RT_BIT_64(iGstReg);
2554 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2555 }
2556 }
2557#else
2558 RT_NOREF(off);
2559#endif
2560
2561 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2562 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2563 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2564}
2565
2566
2567/**
2568 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
2569 * and global overview flags.
2570 */
2571DECL_FORCE_INLINE(void)
2572iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2573{
2574 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2575 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2576 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
2577 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2578 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
2579 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2580 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2581
2582#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2583 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2584 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
2585#else
2586 RT_NOREF(off);
2587#endif
2588
2589 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2590 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
2591 if (!fGstRegShadowsNew)
2592 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2593 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
2594}
2595
2596
2597/**
2598 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2599 * to @a idxRegTo.
2600 */
2601DECL_FORCE_INLINE(void)
2602iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2603 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2604{
2605 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2606 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2607 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
2608 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2609 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
2610 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
2611 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2612 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2613
2614 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
2615 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
2616 if (!fGstRegShadowsFrom)
2617 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
2618 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
2619 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
2620 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2621#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2622 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2623 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2624#else
2625 RT_NOREF(off);
2626#endif
2627}
2628
2629
2630/**
2631 * Allocates a temporary host general purpose register for keeping a guest
2632 * register value.
2633 *
2634 * Since we may already have a register holding the guest register value,
2635 * code will be emitted to do the loading if that's not the case. Code may also
2636 * be emitted if we have to free up a register to satify the request.
2637 *
2638 * @returns The host register number; throws VBox status code on failure, so no
2639 * need to check the return value.
2640 * @param pReNative The native recompile state.
2641 * @param poff Pointer to the variable with the code buffer
2642 * position. This will be update if we need to move a
2643 * variable from register to stack in order to satisfy
2644 * the request.
2645 * @param enmGstReg The guest register that will is to be updated.
2646 * @param enmIntendedUse How the caller will be using the host register.
2647 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2648 */
2649DECL_HIDDEN_THROW(uint8_t)
2650iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2651 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2652{
2653 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2654#ifdef LOG_ENABLED
2655 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
2656#endif
2657
2658 /*
2659 * First check if the guest register value is already in a host register.
2660 */
2661 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2662 {
2663 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2664 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2665 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2666 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2667
2668 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2669 {
2670 /*
2671 * If the register will trash the guest shadow copy, try find a
2672 * completely unused register we can use instead. If that fails,
2673 * we need to disassociate the host reg from the guest reg.
2674 */
2675 /** @todo would be nice to know if preserving the register is in any way helpful. */
2676 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2677 && ( ~pReNative->Core.bmHstRegs
2678 & ~pReNative->Core.bmHstRegsWithGstShadow
2679 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2680 {
2681 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2682
2683 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2684
2685 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2686 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2687 g_apszIemNativeHstRegNames[idxRegNew]));
2688 idxReg = idxRegNew;
2689 }
2690 else
2691 {
2692 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2693 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2694 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2695 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2696 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2697 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2698 else
2699 {
2700 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2701 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2702 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2703 }
2704 }
2705 }
2706 else
2707 {
2708 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2709 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
2710 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
2711 idxReg, s_pszIntendedUse[enmIntendedUse]));
2712
2713 /*
2714 * Allocate a new register, copy the value and, if updating, the
2715 * guest shadow copy assignment to the new register.
2716 */
2717 /** @todo share register for readonly access. */
2718 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2719
2720 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2721 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2722
2723 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
2724 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2725 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2726 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2727 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2728 else
2729 {
2730 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2731 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
2732 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2733 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2734 }
2735 idxReg = idxRegNew;
2736 }
2737
2738#ifdef VBOX_STRICT
2739 /* Strict builds: Check that the value is correct. */
2740 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2741#endif
2742
2743 return idxReg;
2744 }
2745
2746 /*
2747 * Allocate a new register, load it with the guest value and designate it as a copy of the
2748 */
2749 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2750
2751 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
2752 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2753
2754 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2755 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2756 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2757 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2758
2759 return idxRegNew;
2760}
2761
2762
2763/**
2764 * Allocates a temporary host general purpose register that already holds the
2765 * given guest register value.
2766 *
2767 * The use case for this function is places where the shadowing state cannot be
2768 * modified due to branching and such. This will fail if the we don't have a
2769 * current shadow copy handy or if it's incompatible. The only code that will
2770 * be emitted here is value checking code in strict builds.
2771 *
2772 * The intended use can only be readonly!
2773 *
2774 * @returns The host register number, UINT8_MAX if not present.
2775 * @param pReNative The native recompile state.
2776 * @param poff Pointer to the instruction buffer offset.
2777 * Will be updated in strict builds if a register is
2778 * found.
2779 * @param enmGstReg The guest register that will is to be updated.
2780 * @note In strict builds, this may throw instruction buffer growth failures.
2781 * Non-strict builds will not throw anything.
2782 * @sa iemNativeRegAllocTmpForGuestReg
2783 */
2784DECL_HIDDEN_THROW(uint8_t)
2785iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2786{
2787 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2788
2789 /*
2790 * First check if the guest register value is already in a host register.
2791 */
2792 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2793 {
2794 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2795 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2796 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2797 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2798
2799 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2800 {
2801 /*
2802 * We only do readonly use here, so easy compared to the other
2803 * variant of this code.
2804 */
2805 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2806 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2807 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2808 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2809 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2810
2811#ifdef VBOX_STRICT
2812 /* Strict builds: Check that the value is correct. */
2813 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2814#else
2815 RT_NOREF(poff);
2816#endif
2817 return idxReg;
2818 }
2819 }
2820
2821 return UINT8_MAX;
2822}
2823
2824
2825DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2826
2827
2828/**
2829 * Allocates argument registers for a function call.
2830 *
2831 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2832 * need to check the return value.
2833 * @param pReNative The native recompile state.
2834 * @param off The current code buffer offset.
2835 * @param cArgs The number of arguments the function call takes.
2836 */
2837DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2838{
2839 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2840 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2841 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2842 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2843
2844 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2845 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2846 else if (cArgs == 0)
2847 return true;
2848
2849 /*
2850 * Do we get luck and all register are free and not shadowing anything?
2851 */
2852 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2853 for (uint32_t i = 0; i < cArgs; i++)
2854 {
2855 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2856 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2857 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2858 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2859 }
2860 /*
2861 * Okay, not lucky so we have to free up the registers.
2862 */
2863 else
2864 for (uint32_t i = 0; i < cArgs; i++)
2865 {
2866 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2867 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2868 {
2869 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2870 {
2871 case kIemNativeWhat_Var:
2872 {
2873 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2874 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2875 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2876 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2877 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2878
2879 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2880 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2881 else
2882 {
2883 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2884 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2885 }
2886 break;
2887 }
2888
2889 case kIemNativeWhat_Tmp:
2890 case kIemNativeWhat_Arg:
2891 case kIemNativeWhat_rc:
2892 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2893 default:
2894 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2895 }
2896
2897 }
2898 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2899 {
2900 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2901 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2902 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2903 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2904 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2905 }
2906 else
2907 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2908 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2909 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2910 }
2911 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2912 return true;
2913}
2914
2915
2916DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2917
2918
2919#if 0
2920/**
2921 * Frees a register assignment of any type.
2922 *
2923 * @param pReNative The native recompile state.
2924 * @param idxHstReg The register to free.
2925 *
2926 * @note Does not update variables.
2927 */
2928DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2929{
2930 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2931 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2932 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2933 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2934 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2935 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2936 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2937 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2938 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2939 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2940 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2941 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2942 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2943 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2944
2945 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2946 /* no flushing, right:
2947 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2948 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2949 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2950 */
2951}
2952#endif
2953
2954
2955/**
2956 * Frees a temporary register.
2957 *
2958 * Any shadow copies of guest registers assigned to the host register will not
2959 * be flushed by this operation.
2960 */
2961DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2962{
2963 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2964 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2965 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2966 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2967 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2968}
2969
2970
2971/**
2972 * Frees a temporary immediate register.
2973 *
2974 * It is assumed that the call has not modified the register, so it still hold
2975 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2976 */
2977DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2978{
2979 iemNativeRegFreeTmp(pReNative, idxHstReg);
2980}
2981
2982
2983/**
2984 * Called right before emitting a call instruction to move anything important
2985 * out of call-volatile registers, free and flush the call-volatile registers,
2986 * optionally freeing argument variables.
2987 *
2988 * @returns New code buffer offset, UINT32_MAX on failure.
2989 * @param pReNative The native recompile state.
2990 * @param off The code buffer offset.
2991 * @param cArgs The number of arguments the function call takes.
2992 * It is presumed that the host register part of these have
2993 * been allocated as such already and won't need moving,
2994 * just freeing.
2995 */
2996DECL_HIDDEN_THROW(uint32_t)
2997iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2998{
2999 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
3000
3001 /*
3002 * Move anything important out of volatile registers.
3003 */
3004 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3005 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3006 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
3007#ifdef IEMNATIVE_REG_FIXED_TMP0
3008 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3009#endif
3010 & ~g_afIemNativeCallRegs[cArgs];
3011
3012 fRegsToMove &= pReNative->Core.bmHstRegs;
3013 if (!fRegsToMove)
3014 { /* likely */ }
3015 else
3016 {
3017 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
3018 while (fRegsToMove != 0)
3019 {
3020 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
3021 fRegsToMove &= ~RT_BIT_32(idxReg);
3022
3023 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3024 {
3025 case kIemNativeWhat_Var:
3026 {
3027 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3028 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
3029 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
3030 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
3031 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
3032 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
3033 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3034 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3035 else
3036 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3037 continue;
3038 }
3039
3040 case kIemNativeWhat_Arg:
3041 AssertMsgFailed(("What?!?: %u\n", idxReg));
3042 continue;
3043
3044 case kIemNativeWhat_rc:
3045 case kIemNativeWhat_Tmp:
3046 AssertMsgFailed(("Missing free: %u\n", idxReg));
3047 continue;
3048
3049 case kIemNativeWhat_FixedTmp:
3050 case kIemNativeWhat_pVCpuFixed:
3051 case kIemNativeWhat_pCtxFixed:
3052 case kIemNativeWhat_FixedReserved:
3053 case kIemNativeWhat_Invalid:
3054 case kIemNativeWhat_End:
3055 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3056 }
3057 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3058 }
3059 }
3060
3061 /*
3062 * Do the actual freeing.
3063 */
3064 if (pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n", pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3066 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3067
3068 /* If there are guest register shadows in any call-volatile register, we
3069 have to clear the corrsponding guest register masks for each register. */
3070 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3071 if (fHstRegsWithGstShadow)
3072 {
3073 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3074 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
3075 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3076 do
3077 {
3078 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3079 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3080
3081 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
3082 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3083 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3084 } while (fHstRegsWithGstShadow != 0);
3085 }
3086
3087 return off;
3088}
3089
3090
3091/**
3092 * Flushes a set of guest register shadow copies.
3093 *
3094 * This is usually done after calling a threaded function or a C-implementation
3095 * of an instruction.
3096 *
3097 * @param pReNative The native recompile state.
3098 * @param fGstRegs Set of guest registers to flush.
3099 */
3100DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3101{
3102 /*
3103 * Reduce the mask by what's currently shadowed
3104 */
3105 fGstRegs &= pReNative->Core.bmGstRegShadows;
3106 if (fGstRegs)
3107 {
3108 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n",
3109 fGstRegs, pReNative->Core.bmGstRegShadows, pReNative->Core.bmGstRegShadows & ~fGstRegs));
3110 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3111 if (pReNative->Core.bmGstRegShadows)
3112 {
3113 /*
3114 * Partial.
3115 */
3116 do
3117 {
3118 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3119 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3120 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3121 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3122 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3123
3124 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3125 fGstRegs &= ~fInThisHstReg;
3126 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3127 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3128 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3129 } while (fGstRegs != 0);
3130 }
3131 else
3132 {
3133 /*
3134 * Clear all.
3135 */
3136 do
3137 {
3138 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3139 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3140 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3141 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3142 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3143
3144 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3145 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3146 } while (fGstRegs != 0);
3147 pReNative->Core.bmHstRegsWithGstShadow = 0;
3148 }
3149 }
3150}
3151
3152
3153/**
3154 * Flushes delayed write of a specific guest register.
3155 *
3156 * This must be called prior to calling CImpl functions and any helpers that use
3157 * the guest state (like raising exceptions) and such.
3158 *
3159 * This optimization has not yet been implemented. The first target would be
3160 * RIP updates, since these are the most common ones.
3161 */
3162DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3163 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
3164{
3165 RT_NOREF(pReNative, enmClass, idxReg);
3166 return off;
3167}
3168
3169
3170/**
3171 * Flushes any delayed guest register writes.
3172 *
3173 * This must be called prior to calling CImpl functions and any helpers that use
3174 * the guest state (like raising exceptions) and such.
3175 *
3176 * This optimization has not yet been implemented. The first target would be
3177 * RIP updates, since these are the most common ones.
3178 */
3179DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3180{
3181 RT_NOREF(pReNative, off);
3182 return off;
3183}
3184
3185
3186#ifdef VBOX_STRICT
3187/**
3188 * Does internal register allocator sanity checks.
3189 */
3190static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
3191{
3192 /*
3193 * Iterate host registers building a guest shadowing set.
3194 */
3195 uint64_t bmGstRegShadows = 0;
3196 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
3197 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
3198 while (bmHstRegsWithGstShadow)
3199 {
3200 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
3201 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3202 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3203
3204 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3205 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
3206 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
3207 bmGstRegShadows |= fThisGstRegShadows;
3208 while (fThisGstRegShadows)
3209 {
3210 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
3211 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
3212 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
3213 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
3214 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
3215 }
3216 }
3217 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
3218 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
3219 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
3220
3221 /*
3222 * Now the other way around, checking the guest to host index array.
3223 */
3224 bmHstRegsWithGstShadow = 0;
3225 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
3226 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3227 while (bmGstRegShadows)
3228 {
3229 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
3230 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3231 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
3232
3233 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3234 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
3235 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
3236 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
3237 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3238 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3239 }
3240 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
3241 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
3242 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
3243}
3244#endif
3245
3246
3247/*********************************************************************************************************************************
3248* Code Emitters (larger snippets) *
3249*********************************************************************************************************************************/
3250
3251/**
3252 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3253 * extending to 64-bit width.
3254 *
3255 * @returns New code buffer offset on success, UINT32_MAX on failure.
3256 * @param pReNative .
3257 * @param off The current code buffer position.
3258 * @param idxHstReg The host register to load the guest register value into.
3259 * @param enmGstReg The guest register to load.
3260 *
3261 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3262 * that is something the caller needs to do if applicable.
3263 */
3264DECL_HIDDEN_THROW(uint32_t)
3265iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3266{
3267 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3268 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3269
3270 switch (g_aGstShadowInfo[enmGstReg].cb)
3271 {
3272 case sizeof(uint64_t):
3273 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3274 case sizeof(uint32_t):
3275 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3276 case sizeof(uint16_t):
3277 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3278#if 0 /* not present in the table. */
3279 case sizeof(uint8_t):
3280 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3281#endif
3282 default:
3283 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3284 }
3285}
3286
3287
3288#ifdef VBOX_STRICT
3289/**
3290 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
3291 *
3292 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3293 * Trashes EFLAGS on AMD64.
3294 */
3295static uint32_t
3296iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
3297{
3298# ifdef RT_ARCH_AMD64
3299 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3300
3301 /* rol reg64, 32 */
3302 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3303 pbCodeBuf[off++] = 0xc1;
3304 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3305 pbCodeBuf[off++] = 32;
3306
3307 /* test reg32, ffffffffh */
3308 if (idxReg >= 8)
3309 pbCodeBuf[off++] = X86_OP_REX_B;
3310 pbCodeBuf[off++] = 0xf7;
3311 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3312 pbCodeBuf[off++] = 0xff;
3313 pbCodeBuf[off++] = 0xff;
3314 pbCodeBuf[off++] = 0xff;
3315 pbCodeBuf[off++] = 0xff;
3316
3317 /* je/jz +1 */
3318 pbCodeBuf[off++] = 0x74;
3319 pbCodeBuf[off++] = 0x01;
3320
3321 /* int3 */
3322 pbCodeBuf[off++] = 0xcc;
3323
3324 /* rol reg64, 32 */
3325 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3326 pbCodeBuf[off++] = 0xc1;
3327 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3328 pbCodeBuf[off++] = 32;
3329
3330# elif defined(RT_ARCH_ARM64)
3331 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3332 /* lsr tmp0, reg64, #32 */
3333 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
3334 /* cbz tmp0, +1 */
3335 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3336 /* brk #0x1100 */
3337 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
3338
3339# else
3340# error "Port me!"
3341# endif
3342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3343 return off;
3344}
3345#endif /* VBOX_STRICT */
3346
3347
3348#ifdef VBOX_STRICT
3349/**
3350 * Emitting code that checks that the content of register @a idxReg is the same
3351 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3352 * instruction if that's not the case.
3353 *
3354 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3355 * Trashes EFLAGS on AMD64.
3356 */
3357static uint32_t
3358iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3359{
3360# ifdef RT_ARCH_AMD64
3361 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3362
3363 /* cmp reg, [mem] */
3364 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3365 {
3366 if (idxReg >= 8)
3367 pbCodeBuf[off++] = X86_OP_REX_R;
3368 pbCodeBuf[off++] = 0x38;
3369 }
3370 else
3371 {
3372 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3373 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3374 else
3375 {
3376 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3377 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3378 else
3379 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3380 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3381 if (idxReg >= 8)
3382 pbCodeBuf[off++] = X86_OP_REX_R;
3383 }
3384 pbCodeBuf[off++] = 0x39;
3385 }
3386 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3387
3388 /* je/jz +1 */
3389 pbCodeBuf[off++] = 0x74;
3390 pbCodeBuf[off++] = 0x01;
3391
3392 /* int3 */
3393 pbCodeBuf[off++] = 0xcc;
3394
3395 /* For values smaller than the register size, we must check that the rest
3396 of the register is all zeros. */
3397 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3398 {
3399 /* test reg64, imm32 */
3400 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3401 pbCodeBuf[off++] = 0xf7;
3402 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3403 pbCodeBuf[off++] = 0;
3404 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3405 pbCodeBuf[off++] = 0xff;
3406 pbCodeBuf[off++] = 0xff;
3407
3408 /* je/jz +1 */
3409 pbCodeBuf[off++] = 0x74;
3410 pbCodeBuf[off++] = 0x01;
3411
3412 /* int3 */
3413 pbCodeBuf[off++] = 0xcc;
3414 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3415 }
3416 else
3417 {
3418 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3419 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3420 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
3421 }
3422
3423# elif defined(RT_ARCH_ARM64)
3424 /* mov TMP0, [gstreg] */
3425 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3426
3427 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3428 /* sub tmp0, tmp0, idxReg */
3429 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3430 /* cbz tmp0, +1 */
3431 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3432 /* brk #0x1000+enmGstReg */
3433 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3434 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3435
3436# else
3437# error "Port me!"
3438# endif
3439 return off;
3440}
3441#endif /* VBOX_STRICT */
3442
3443
3444/**
3445 * Emits a code for checking the return code of a call and rcPassUp, returning
3446 * from the code if either are non-zero.
3447 */
3448DECL_HIDDEN_THROW(uint32_t)
3449iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3450{
3451#ifdef RT_ARCH_AMD64
3452 /*
3453 * AMD64: eax = call status code.
3454 */
3455
3456 /* edx = rcPassUp */
3457 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3458# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3459 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3460# endif
3461
3462 /* edx = eax | rcPassUp */
3463 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3464 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3465 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3467
3468 /* Jump to non-zero status return path. */
3469 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3470
3471 /* done. */
3472
3473#elif RT_ARCH_ARM64
3474 /*
3475 * ARM64: w0 = call status code.
3476 */
3477 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3478 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3479
3480 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3481
3482 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3483
3484 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3485 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3486 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3487
3488#else
3489# error "port me"
3490#endif
3491 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3492 return off;
3493}
3494
3495
3496/**
3497 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3498 * raising a \#GP(0) if it isn't.
3499 *
3500 * @returns New code buffer offset, UINT32_MAX on failure.
3501 * @param pReNative The native recompile state.
3502 * @param off The code buffer offset.
3503 * @param idxAddrReg The host register with the address to check.
3504 * @param idxInstr The current instruction.
3505 */
3506DECL_HIDDEN_THROW(uint32_t)
3507iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3508{
3509 RT_NOREF(idxInstr);
3510
3511 /*
3512 * Make sure we don't have any outstanding guest register writes as we may
3513 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3514 */
3515 off = iemNativeRegFlushPendingWrites(pReNative, off);
3516
3517#ifdef RT_ARCH_AMD64
3518 /*
3519 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3520 * return raisexcpt();
3521 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3522 */
3523 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3524
3525 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3526 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3527 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3528 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3529
3530# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3531 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3532# else
3533 uint32_t const offFixup = off;
3534 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3535 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3536 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3537 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3538# endif
3539
3540 iemNativeRegFreeTmp(pReNative, iTmpReg);
3541
3542#elif defined(RT_ARCH_ARM64)
3543 /*
3544 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3545 * return raisexcpt();
3546 * ----
3547 * mov x1, 0x800000000000
3548 * add x1, x0, x1
3549 * cmp xzr, x1, lsr 48
3550 * and either:
3551 * b.ne .Lraisexcpt
3552 * or:
3553 * b.eq .Lnoexcept
3554 * movz x1, #instruction-number
3555 * b .Lraisexcpt
3556 * .Lnoexcept:
3557 */
3558 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3559
3560 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3561 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3562 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3563
3564# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3565 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3566# else
3567 uint32_t const offFixup = off;
3568 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3569 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3570 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3571 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3572# endif
3573
3574 iemNativeRegFreeTmp(pReNative, iTmpReg);
3575
3576#else
3577# error "Port me"
3578#endif
3579 return off;
3580}
3581
3582
3583/**
3584 * Emits code to check if the content of @a idxAddrReg is within the limit of
3585 * idxSegReg, raising a \#GP(0) if it isn't.
3586 *
3587 * @returns New code buffer offset; throws VBox status code on error.
3588 * @param pReNative The native recompile state.
3589 * @param off The code buffer offset.
3590 * @param idxAddrReg The host register (32-bit) with the address to
3591 * check.
3592 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3593 * against.
3594 * @param idxInstr The current instruction.
3595 */
3596DECL_HIDDEN_THROW(uint32_t)
3597iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3598 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3599{
3600 /*
3601 * Make sure we don't have any outstanding guest register writes as we may
3602 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3603 */
3604 off = iemNativeRegFlushPendingWrites(pReNative, off);
3605
3606 /** @todo implement expand down/whatnot checking */
3607 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3608
3609 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3610 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3611 kIemNativeGstRegUse_ForUpdate);
3612
3613 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3614
3615#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3616 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3617 RT_NOREF(idxInstr);
3618#else
3619 uint32_t const offFixup = off;
3620 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3621 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3622 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3623 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3624#endif
3625
3626 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3627 return off;
3628}
3629
3630
3631/**
3632 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3633 *
3634 * @returns The flush mask.
3635 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3636 * @param fGstShwFlush The starting flush mask.
3637 */
3638DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3639{
3640 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3641 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3642 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3643 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3644 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3645 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3646 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3647 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3648 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3649 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3650 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3651 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3652 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3653 return fGstShwFlush;
3654}
3655
3656
3657/**
3658 * Emits a call to a CImpl function or something similar.
3659 */
3660static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3661 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3662 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3663{
3664 /*
3665 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
3666 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
3667 */
3668 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
3669 fGstShwFlush
3670 | RT_BIT_64(kIemNativeGstReg_Pc)
3671 | RT_BIT_64(kIemNativeGstReg_EFlags));
3672 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3673
3674 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3675
3676 /*
3677 * Load the parameters.
3678 */
3679#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3680 /* Special code the hidden VBOXSTRICTRC pointer. */
3681 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3682 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3683 if (cAddParams > 0)
3684 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3685 if (cAddParams > 1)
3686 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3687 if (cAddParams > 2)
3688 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3689 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3690
3691#else
3692 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3693 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3694 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3695 if (cAddParams > 0)
3696 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3697 if (cAddParams > 1)
3698 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3699 if (cAddParams > 2)
3700# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3701 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3702# else
3703 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3704# endif
3705#endif
3706
3707 /*
3708 * Make the call.
3709 */
3710 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3711
3712#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3713 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3714#endif
3715
3716 /*
3717 * Check the status code.
3718 */
3719 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3720}
3721
3722
3723/**
3724 * Emits a call to a threaded worker function.
3725 */
3726static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3727{
3728 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3729 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3730 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3731
3732#ifdef RT_ARCH_AMD64
3733 /* Load the parameters and emit the call. */
3734# ifdef RT_OS_WINDOWS
3735# ifndef VBOXSTRICTRC_STRICT_ENABLED
3736 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3737 if (cParams > 0)
3738 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3739 if (cParams > 1)
3740 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3741 if (cParams > 2)
3742 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3743# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3745 if (cParams > 0)
3746 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3747 if (cParams > 1)
3748 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3749 if (cParams > 2)
3750 {
3751 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3752 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3753 }
3754 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3755# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3756# else
3757 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3758 if (cParams > 0)
3759 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3760 if (cParams > 1)
3761 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3762 if (cParams > 2)
3763 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3764# endif
3765
3766 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3767
3768# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3769 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3770# endif
3771
3772#elif RT_ARCH_ARM64
3773 /*
3774 * ARM64:
3775 */
3776 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3777 if (cParams > 0)
3778 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3779 if (cParams > 1)
3780 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3781 if (cParams > 2)
3782 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3783
3784 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3785
3786#else
3787# error "port me"
3788#endif
3789
3790 /*
3791 * Check the status code.
3792 */
3793 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3794
3795 return off;
3796}
3797
3798
3799/**
3800 * Emits the code at the RaiseGP0 label.
3801 */
3802static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3803{
3804 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3805 if (idxLabel != UINT32_MAX)
3806 {
3807 iemNativeLabelDefine(pReNative, idxLabel, off);
3808
3809 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3810 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3811#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3812 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3813#endif
3814 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3815
3816 /* jump back to the return sequence. */
3817 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3818 }
3819 return off;
3820}
3821
3822
3823/**
3824 * Emits the code at the ReturnWithFlags label (returns
3825 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3826 */
3827static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3828{
3829 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3830 if (idxLabel != UINT32_MAX)
3831 {
3832 iemNativeLabelDefine(pReNative, idxLabel, off);
3833
3834 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3835
3836 /* jump back to the return sequence. */
3837 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3838 }
3839 return off;
3840}
3841
3842
3843/**
3844 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3845 */
3846static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3847{
3848 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3849 if (idxLabel != UINT32_MAX)
3850 {
3851 iemNativeLabelDefine(pReNative, idxLabel, off);
3852
3853 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3854
3855 /* jump back to the return sequence. */
3856 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3857 }
3858 return off;
3859}
3860
3861
3862/**
3863 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3864 */
3865static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3866{
3867 /*
3868 * Generate the rc + rcPassUp fiddling code if needed.
3869 */
3870 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3871 if (idxLabel != UINT32_MAX)
3872 {
3873 iemNativeLabelDefine(pReNative, idxLabel, off);
3874
3875 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3876#ifdef RT_ARCH_AMD64
3877# ifdef RT_OS_WINDOWS
3878# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3879 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3880# endif
3881 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3882 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3883# else
3884 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3885 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3886# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3887 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3888# endif
3889# endif
3890# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3891 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3892# endif
3893
3894#else
3895 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3896 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3897 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3898#endif
3899
3900 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3901 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3902 }
3903 return off;
3904}
3905
3906
3907/**
3908 * Emits a standard epilog.
3909 */
3910static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3911{
3912 *pidxReturnLabel = UINT32_MAX;
3913
3914 /*
3915 * Successful return, so clear the return register (eax, w0).
3916 */
3917 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3918
3919 /*
3920 * Define label for common return point.
3921 */
3922 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3923 *pidxReturnLabel = idxReturn;
3924
3925 /*
3926 * Restore registers and return.
3927 */
3928#ifdef RT_ARCH_AMD64
3929 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3930
3931 /* Reposition esp at the r15 restore point. */
3932 pbCodeBuf[off++] = X86_OP_REX_W;
3933 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3935 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3936
3937 /* Pop non-volatile registers and return */
3938 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3939 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3940 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3941 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3942 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3943 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3944 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3945 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3946# ifdef RT_OS_WINDOWS
3947 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3948 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3949# endif
3950 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3951 pbCodeBuf[off++] = 0xc9; /* leave */
3952 pbCodeBuf[off++] = 0xc3; /* ret */
3953 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3954
3955#elif RT_ARCH_ARM64
3956 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3957
3958 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3959 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3960 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3961 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3962 IEMNATIVE_FRAME_VAR_SIZE / 8);
3963 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3964 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3965 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3966 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3967 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3968 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3969 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3970 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3971 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3972 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3973 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3974 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3975
3976 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3977 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3978 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3979 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3980
3981 /* retab / ret */
3982# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3983 if (1)
3984 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3985 else
3986# endif
3987 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3988
3989#else
3990# error "port me"
3991#endif
3992 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3993
3994 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3995}
3996
3997
3998/**
3999 * Emits a standard prolog.
4000 */
4001static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4002{
4003#ifdef RT_ARCH_AMD64
4004 /*
4005 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
4006 * reserving 64 bytes for stack variables plus 4 non-register argument
4007 * slots. Fixed register assignment: xBX = pReNative;
4008 *
4009 * Since we always do the same register spilling, we can use the same
4010 * unwind description for all the code.
4011 */
4012 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
4013 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
4014 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
4015 pbCodeBuf[off++] = 0x8b;
4016 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
4017 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
4018 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
4019# ifdef RT_OS_WINDOWS
4020 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
4021 pbCodeBuf[off++] = 0x8b;
4022 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
4023 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
4024 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
4025# else
4026 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
4027 pbCodeBuf[off++] = 0x8b;
4028 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
4029# endif
4030 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
4031 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
4032 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
4033 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
4034 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
4035 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
4036 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
4037 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
4038
4039 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
4040 X86_GREG_xSP,
4041 IEMNATIVE_FRAME_ALIGN_SIZE
4042 + IEMNATIVE_FRAME_VAR_SIZE
4043 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
4044 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
4045 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
4046 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
4047 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
4048
4049#elif RT_ARCH_ARM64
4050 /*
4051 * We set up a stack frame exactly like on x86, only we have to push the
4052 * return address our selves here. We save all non-volatile registers.
4053 */
4054 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
4055
4056# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
4057 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
4058 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
4059 * in any way conditional, so just emitting this instructions now and hoping for the best... */
4060 /* pacibsp */
4061 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
4062# endif
4063
4064 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
4065 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
4066 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
4067 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
4068 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
4069 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
4070 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4071 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
4072 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4073 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
4074 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4075 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
4076 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4077 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
4078 /* Save the BP and LR (ret address) registers at the top of the frame. */
4079 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
4080 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
4081 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
4082 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
4083 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
4084 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
4085
4086 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
4087 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
4088
4089 /* mov r28, r0 */
4090 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
4091 /* mov r27, r1 */
4092 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
4093
4094#else
4095# error "port me"
4096#endif
4097 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4098 return off;
4099}
4100
4101
4102
4103
4104/*********************************************************************************************************************************
4105* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
4106*********************************************************************************************************************************/
4107
4108#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
4109 { \
4110 Assert(pReNative->Core.bmVars == 0); \
4111 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
4112 Assert(pReNative->Core.bmStack == 0); \
4113 pReNative->fMc = (a_fMcFlags); \
4114 pReNative->fCImpl = (a_fCImplFlags); \
4115 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
4116
4117/** We have to get to the end in recompilation mode, as otherwise we won't
4118 * generate code for all the IEM_MC_IF_XXX branches. */
4119#define IEM_MC_END() \
4120 iemNativeVarFreeAll(pReNative); \
4121 } return off
4122
4123
4124
4125/*********************************************************************************************************************************
4126* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
4127*********************************************************************************************************************************/
4128
4129#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
4130 pReNative->fMc = 0; \
4131 pReNative->fCImpl = (a_fFlags); \
4132 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
4133
4134
4135#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4136 pReNative->fMc = 0; \
4137 pReNative->fCImpl = (a_fFlags); \
4138 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
4139
4140DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4141 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4142 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
4143{
4144 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
4145}
4146
4147
4148#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4149 pReNative->fMc = 0; \
4150 pReNative->fCImpl = (a_fFlags); \
4151 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4152 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
4153
4154DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4155 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4156 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
4157{
4158 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
4159}
4160
4161
4162#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4163 pReNative->fMc = 0; \
4164 pReNative->fCImpl = (a_fFlags); \
4165 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4166 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
4167
4168DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4169 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4170 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
4171 uint64_t uArg2)
4172{
4173 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
4174}
4175
4176
4177
4178/*********************************************************************************************************************************
4179* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
4180*********************************************************************************************************************************/
4181
4182/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4183 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
4184DECL_INLINE_THROW(uint32_t)
4185iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4186{
4187 /*
4188 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4189 * return with special status code and make the execution loop deal with
4190 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4191 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4192 * could continue w/o interruption, it probably will drop into the
4193 * debugger, so not worth the effort of trying to services it here and we
4194 * just lump it in with the handling of the others.
4195 *
4196 * To simplify the code and the register state management even more (wrt
4197 * immediate in AND operation), we always update the flags and skip the
4198 * extra check associated conditional jump.
4199 */
4200 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4201 <= UINT32_MAX);
4202 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4203 kIemNativeGstRegUse_ForUpdate);
4204 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4205 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4206 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4207 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4208 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4209
4210 /* Free but don't flush the EFLAGS register. */
4211 iemNativeRegFreeTmp(pReNative, idxEflReg);
4212
4213 return off;
4214}
4215
4216
4217#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4218 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4219
4220#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4221 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4222 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4223
4224/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4225DECL_INLINE_THROW(uint32_t)
4226iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4227{
4228 /* Allocate a temporary PC register. */
4229 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4230
4231 /* Perform the addition and store the result. */
4232 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4233 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4234
4235 /* Free but don't flush the PC register. */
4236 iemNativeRegFreeTmp(pReNative, idxPcReg);
4237
4238 return off;
4239}
4240
4241
4242#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4243 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4244
4245#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4246 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4247 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4248
4249/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4250DECL_INLINE_THROW(uint32_t)
4251iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4252{
4253 /* Allocate a temporary PC register. */
4254 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4255
4256 /* Perform the addition and store the result. */
4257 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4258 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4259
4260 /* Free but don't flush the PC register. */
4261 iemNativeRegFreeTmp(pReNative, idxPcReg);
4262
4263 return off;
4264}
4265
4266
4267#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4268 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4269
4270#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4271 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4272 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4273
4274/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4275DECL_INLINE_THROW(uint32_t)
4276iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4277{
4278 /* Allocate a temporary PC register. */
4279 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4280
4281 /* Perform the addition and store the result. */
4282 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4283 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4284 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4285
4286 /* Free but don't flush the PC register. */
4287 iemNativeRegFreeTmp(pReNative, idxPcReg);
4288
4289 return off;
4290}
4291
4292
4293
4294/*********************************************************************************************************************************
4295* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4296*********************************************************************************************************************************/
4297
4298#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4299 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4300 (a_enmEffOpSize), pCallEntry->idxInstr)
4301
4302#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4303 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4304 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4305
4306#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4307 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4308 IEMMODE_16BIT, pCallEntry->idxInstr)
4309
4310#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4311 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4312 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4313
4314#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4315 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4316 IEMMODE_64BIT, pCallEntry->idxInstr)
4317
4318#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4319 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4320 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4321
4322/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4323 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4324 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4325DECL_INLINE_THROW(uint32_t)
4326iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4327 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4328{
4329 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4330
4331 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4332 off = iemNativeRegFlushPendingWrites(pReNative, off);
4333
4334 /* Allocate a temporary PC register. */
4335 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4336
4337 /* Perform the addition. */
4338 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4339
4340 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4341 {
4342 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4343 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4344 }
4345 else
4346 {
4347 /* Just truncate the result to 16-bit IP. */
4348 Assert(enmEffOpSize == IEMMODE_16BIT);
4349 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4350 }
4351 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4352
4353 /* Free but don't flush the PC register. */
4354 iemNativeRegFreeTmp(pReNative, idxPcReg);
4355
4356 return off;
4357}
4358
4359
4360#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4361 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4362 (a_enmEffOpSize), pCallEntry->idxInstr)
4363
4364#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4365 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4366 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4367
4368#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4369 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4370 IEMMODE_16BIT, pCallEntry->idxInstr)
4371
4372#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4373 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4374 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4375
4376#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4377 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4378 IEMMODE_32BIT, pCallEntry->idxInstr)
4379
4380#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4381 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4382 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4383
4384/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4385 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4386 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4387DECL_INLINE_THROW(uint32_t)
4388iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4389 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4390{
4391 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4392
4393 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4394 off = iemNativeRegFlushPendingWrites(pReNative, off);
4395
4396 /* Allocate a temporary PC register. */
4397 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4398
4399 /* Perform the addition. */
4400 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4401
4402 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4403 if (enmEffOpSize == IEMMODE_16BIT)
4404 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4405
4406 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4407 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4408
4409 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4410
4411 /* Free but don't flush the PC register. */
4412 iemNativeRegFreeTmp(pReNative, idxPcReg);
4413
4414 return off;
4415}
4416
4417
4418#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4419 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4420
4421#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4422 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4424
4425#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4426 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4427
4428#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4429 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4430 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4431
4432#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4433 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4434
4435#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4436 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4437 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4438
4439/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4440DECL_INLINE_THROW(uint32_t)
4441iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4442 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4443{
4444 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4445 off = iemNativeRegFlushPendingWrites(pReNative, off);
4446
4447 /* Allocate a temporary PC register. */
4448 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4449
4450 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4451 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4452 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4453 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4454 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4455
4456 /* Free but don't flush the PC register. */
4457 iemNativeRegFreeTmp(pReNative, idxPcReg);
4458
4459 return off;
4460}
4461
4462
4463
4464/*********************************************************************************************************************************
4465* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4466*********************************************************************************************************************************/
4467
4468/**
4469 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4470 *
4471 * @returns Pointer to the condition stack entry on success, NULL on failure
4472 * (too many nestings)
4473 */
4474DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4475{
4476 uint32_t const idxStack = pReNative->cCondDepth;
4477 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4478
4479 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4480 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4481
4482 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4483 pEntry->fInElse = false;
4484 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4485 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4486
4487 return pEntry;
4488}
4489
4490
4491/**
4492 * Start of the if-block, snapshotting the register and variable state.
4493 */
4494DECL_INLINE_THROW(void)
4495iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4496{
4497 Assert(offIfBlock != UINT32_MAX);
4498 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4499 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4500 Assert(!pEntry->fInElse);
4501
4502 /* Define the start of the IF block if request or for disassembly purposes. */
4503 if (idxLabelIf != UINT32_MAX)
4504 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4505#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4506 else
4507 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4508#else
4509 RT_NOREF(offIfBlock);
4510#endif
4511
4512 /* Copy the initial state so we can restore it in the 'else' block. */
4513 pEntry->InitialState = pReNative->Core;
4514}
4515
4516
4517#define IEM_MC_ELSE() } while (0); \
4518 off = iemNativeEmitElse(pReNative, off); \
4519 do {
4520
4521/** Emits code related to IEM_MC_ELSE. */
4522DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4523{
4524 /* Check sanity and get the conditional stack entry. */
4525 Assert(off != UINT32_MAX);
4526 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4527 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4528 Assert(!pEntry->fInElse);
4529
4530 /* Jump to the endif */
4531 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4532
4533 /* Define the else label and enter the else part of the condition. */
4534 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4535 pEntry->fInElse = true;
4536
4537 /* Snapshot the core state so we can do a merge at the endif and restore
4538 the snapshot we took at the start of the if-block. */
4539 pEntry->IfFinalState = pReNative->Core;
4540 pReNative->Core = pEntry->InitialState;
4541
4542 return off;
4543}
4544
4545
4546#define IEM_MC_ENDIF() } while (0); \
4547 off = iemNativeEmitEndIf(pReNative, off)
4548
4549/** Emits code related to IEM_MC_ENDIF. */
4550DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4551{
4552 /* Check sanity and get the conditional stack entry. */
4553 Assert(off != UINT32_MAX);
4554 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4555 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4556
4557 /*
4558 * Now we have find common group with the core state at the end of the
4559 * if-final. Use the smallest common denominator and just drop anything
4560 * that isn't the same in both states.
4561 */
4562 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4563 * which is why we're doing this at the end of the else-block.
4564 * But we'd need more info about future for that to be worth the effort. */
4565 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4566 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4567 {
4568 /* shadow guest stuff first. */
4569 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4570 if (fGstRegs)
4571 {
4572 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4573 do
4574 {
4575 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4576 fGstRegs &= ~RT_BIT_64(idxGstReg);
4577
4578 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4579 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4580 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4581 {
4582 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
4583 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4584 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4585 }
4586 } while (fGstRegs);
4587 }
4588 else
4589 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4590
4591 /* Check variables next. For now we must require them to be identical
4592 or stuff we can recreate. */
4593 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4594 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4595 if (fVars)
4596 {
4597 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4598 do
4599 {
4600 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4601 fVars &= ~RT_BIT_32(idxVar);
4602
4603 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4604 {
4605 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4606 continue;
4607 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4608 {
4609 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4610 if (idxHstReg != UINT8_MAX)
4611 {
4612 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4613 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4614 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4615 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4616 }
4617 continue;
4618 }
4619 }
4620 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4621 continue;
4622
4623 /* Irreconcilable, so drop it. */
4624 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4625 if (idxHstReg != UINT8_MAX)
4626 {
4627 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4628 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4629 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4630 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4631 }
4632 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4633 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4634 } while (fVars);
4635 }
4636
4637 /* Finally, check that the host register allocations matches. */
4638 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4639 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4640 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4641 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4642 }
4643
4644 /*
4645 * Define the endif label and maybe the else one if we're still in the 'if' part.
4646 */
4647 if (!pEntry->fInElse)
4648 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4649 else
4650 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4651 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4652
4653 /* Pop the conditional stack.*/
4654 pReNative->cCondDepth -= 1;
4655
4656 return off;
4657}
4658
4659
4660#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4661 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4662 do {
4663
4664/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4665DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4666{
4667 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4668
4669 /* Get the eflags. */
4670 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4671 kIemNativeGstRegUse_ReadOnly);
4672
4673 /* Test and jump. */
4674 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4675
4676 /* Free but don't flush the EFlags register. */
4677 iemNativeRegFreeTmp(pReNative, idxEflReg);
4678
4679 /* Make a copy of the core state now as we start the if-block. */
4680 iemNativeCondStartIfBlock(pReNative, off);
4681
4682 return off;
4683}
4684
4685
4686#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4687 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4688 do {
4689
4690/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4691DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4692{
4693 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4694
4695 /* Get the eflags. */
4696 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4697 kIemNativeGstRegUse_ReadOnly);
4698
4699 /* Test and jump. */
4700 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4701
4702 /* Free but don't flush the EFlags register. */
4703 iemNativeRegFreeTmp(pReNative, idxEflReg);
4704
4705 /* Make a copy of the core state now as we start the if-block. */
4706 iemNativeCondStartIfBlock(pReNative, off);
4707
4708 return off;
4709}
4710
4711
4712#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4713 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4714 do {
4715
4716/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4717DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4718{
4719 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4720
4721 /* Get the eflags. */
4722 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4723 kIemNativeGstRegUse_ReadOnly);
4724
4725 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4726 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4727
4728 /* Test and jump. */
4729 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4730
4731 /* Free but don't flush the EFlags register. */
4732 iemNativeRegFreeTmp(pReNative, idxEflReg);
4733
4734 /* Make a copy of the core state now as we start the if-block. */
4735 iemNativeCondStartIfBlock(pReNative, off);
4736
4737 return off;
4738}
4739
4740
4741#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4742 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4743 do {
4744
4745/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4746DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4747{
4748 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4749
4750 /* Get the eflags. */
4751 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4752 kIemNativeGstRegUse_ReadOnly);
4753
4754 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4755 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4756
4757 /* Test and jump. */
4758 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4759
4760 /* Free but don't flush the EFlags register. */
4761 iemNativeRegFreeTmp(pReNative, idxEflReg);
4762
4763 /* Make a copy of the core state now as we start the if-block. */
4764 iemNativeCondStartIfBlock(pReNative, off);
4765
4766 return off;
4767}
4768
4769
4770#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4771 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4772 do {
4773
4774#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4775 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4776 do {
4777
4778/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4779DECL_INLINE_THROW(uint32_t)
4780iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4781 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4782{
4783 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4784
4785 /* Get the eflags. */
4786 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4787 kIemNativeGstRegUse_ReadOnly);
4788
4789 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4790 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4791
4792 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4793 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4794 Assert(iBitNo1 != iBitNo2);
4795
4796#ifdef RT_ARCH_AMD64
4797 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4798
4799 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4800 if (iBitNo1 > iBitNo2)
4801 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4802 else
4803 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4804 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4805
4806#elif defined(RT_ARCH_ARM64)
4807 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4808 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4809
4810 /* and tmpreg, eflreg, #1<<iBitNo1 */
4811 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4812
4813 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4814 if (iBitNo1 > iBitNo2)
4815 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4816 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4817 else
4818 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4819 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4820
4821 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4822
4823#else
4824# error "Port me"
4825#endif
4826
4827 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4828 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4829 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4830
4831 /* Free but don't flush the EFlags and tmp registers. */
4832 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4833 iemNativeRegFreeTmp(pReNative, idxEflReg);
4834
4835 /* Make a copy of the core state now as we start the if-block. */
4836 iemNativeCondStartIfBlock(pReNative, off);
4837
4838 return off;
4839}
4840
4841
4842#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4843 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4844 do {
4845
4846#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4847 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4848 do {
4849
4850/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4851 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4852DECL_INLINE_THROW(uint32_t)
4853iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4854 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4855{
4856 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4857
4858 /* We need an if-block label for the non-inverted variant. */
4859 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4860 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4861
4862 /* Get the eflags. */
4863 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4864 kIemNativeGstRegUse_ReadOnly);
4865
4866 /* Translate the flag masks to bit numbers. */
4867 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4868 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4869
4870 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4871 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4872 Assert(iBitNo1 != iBitNo);
4873
4874 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4875 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4876 Assert(iBitNo2 != iBitNo);
4877 Assert(iBitNo2 != iBitNo1);
4878
4879#ifdef RT_ARCH_AMD64
4880 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4881#elif defined(RT_ARCH_ARM64)
4882 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4883#endif
4884
4885 /* Check for the lone bit first. */
4886 if (!fInverted)
4887 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4888 else
4889 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4890
4891 /* Then extract and compare the other two bits. */
4892#ifdef RT_ARCH_AMD64
4893 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4894 if (iBitNo1 > iBitNo2)
4895 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4896 else
4897 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4898 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4899
4900#elif defined(RT_ARCH_ARM64)
4901 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4902
4903 /* and tmpreg, eflreg, #1<<iBitNo1 */
4904 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4905
4906 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4907 if (iBitNo1 > iBitNo2)
4908 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4909 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4910 else
4911 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4912 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4913
4914 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4915
4916#else
4917# error "Port me"
4918#endif
4919
4920 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4921 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4922 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4923
4924 /* Free but don't flush the EFlags and tmp registers. */
4925 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4926 iemNativeRegFreeTmp(pReNative, idxEflReg);
4927
4928 /* Make a copy of the core state now as we start the if-block. */
4929 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4930
4931 return off;
4932}
4933
4934
4935#define IEM_MC_IF_CX_IS_NZ() \
4936 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4937 do {
4938
4939/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4940DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4941{
4942 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4943
4944 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
4945 kIemNativeGstRegUse_ReadOnly);
4946 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4947 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4948
4949 iemNativeCondStartIfBlock(pReNative, off);
4950 return off;
4951}
4952
4953
4954#define IEM_MC_IF_ECX_IS_NZ() \
4955 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4956 do {
4957
4958#define IEM_MC_IF_RCX_IS_NZ() \
4959 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4960 do {
4961
4962/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4963DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4964{
4965 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4966
4967 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
4968 kIemNativeGstRegUse_ReadOnly);
4969 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4970 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4971
4972 iemNativeCondStartIfBlock(pReNative, off);
4973 return off;
4974}
4975
4976
4977#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4978 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4979 do {
4980
4981#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4982 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4983 do {
4984
4985/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4986DECL_INLINE_THROW(uint32_t)
4987iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4988{
4989 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4990
4991 /* We have to load both RCX and EFLAGS before we can start branching,
4992 otherwise we'll end up in the else-block with an inconsistent
4993 register allocator state.
4994 Doing EFLAGS first as it's more likely to be loaded, right? */
4995 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4996 kIemNativeGstRegUse_ReadOnly);
4997 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
4998 kIemNativeGstRegUse_ReadOnly);
4999
5000 /** @todo we could reduce this to a single branch instruction by spending a
5001 * temporary register and some setnz stuff. Not sure if loops are
5002 * worth it. */
5003 /* Check CX. */
5004 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
5005
5006 /* Check the EFlags bit. */
5007 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5008 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5009 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5010 !fCheckIfSet /*fJmpIfSet*/);
5011
5012 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5013 iemNativeRegFreeTmp(pReNative, idxEflReg);
5014
5015 iemNativeCondStartIfBlock(pReNative, off);
5016 return off;
5017}
5018
5019
5020#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5021 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
5022 do {
5023
5024#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5025 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
5026 do {
5027
5028#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
5029 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
5030 do {
5031
5032#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
5033 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
5034 do {
5035
5036/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
5037 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
5038 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
5039 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
5040DECL_INLINE_THROW(uint32_t)
5041iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5042 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
5043{
5044 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
5045
5046 /* We have to load both RCX and EFLAGS before we can start branching,
5047 otherwise we'll end up in the else-block with an inconsistent
5048 register allocator state.
5049 Doing EFLAGS first as it's more likely to be loaded, right? */
5050 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5051 kIemNativeGstRegUse_ReadOnly);
5052 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
5053 kIemNativeGstRegUse_ReadOnly);
5054
5055 /** @todo we could reduce this to a single branch instruction by spending a
5056 * temporary register and some setnz stuff. Not sure if loops are
5057 * worth it. */
5058 /* Check RCX/ECX. */
5059 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
5060
5061 /* Check the EFlags bit. */
5062 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
5063 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
5064 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
5065 !fCheckIfSet /*fJmpIfSet*/);
5066
5067 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
5068 iemNativeRegFreeTmp(pReNative, idxEflReg);
5069
5070 iemNativeCondStartIfBlock(pReNative, off);
5071 return off;
5072}
5073
5074
5075
5076/*********************************************************************************************************************************
5077* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
5078*********************************************************************************************************************************/
5079/** Number of hidden arguments for CIMPL calls.
5080 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
5081#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5082# define IEM_CIMPL_HIDDEN_ARGS 3
5083#else
5084# define IEM_CIMPL_HIDDEN_ARGS 2
5085#endif
5086
5087#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
5088 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
5089
5090#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
5091 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
5092
5093#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
5094 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
5095
5096#define IEM_MC_LOCAL(a_Type, a_Name) \
5097 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
5098
5099#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
5100 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
5101
5102
5103/**
5104 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
5105 */
5106DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
5107{
5108 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
5109 return IEM_CIMPL_HIDDEN_ARGS;
5110 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
5111 return 1;
5112 return 0;
5113}
5114
5115
5116/**
5117 * Internal work that allocates a variable with kind set to
5118 * kIemNativeVarKind_Invalid and no current stack allocation.
5119 *
5120 * The kind will either be set by the caller or later when the variable is first
5121 * assigned a value.
5122 */
5123static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5124{
5125 Assert(cbType > 0 && cbType <= 64);
5126 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
5127 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
5128 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
5129 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5130 pReNative->Core.aVars[idxVar].cbVar = cbType;
5131 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5132 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5133 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
5134 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
5135 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
5136 pReNative->Core.aVars[idxVar].u.uValue = 0;
5137 return idxVar;
5138}
5139
5140
5141/**
5142 * Internal work that allocates an argument variable w/o setting enmKind.
5143 */
5144static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5145{
5146 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
5147 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5148 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
5149
5150 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5151 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
5152 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
5153 return idxVar;
5154}
5155
5156
5157/**
5158 * Changes the variable to a stack variable.
5159 *
5160 * Currently this is s only possible to do the first time the variable is used,
5161 * switching later is can be implemented but not done.
5162 *
5163 * @param pReNative The recompiler state.
5164 * @param idxVar The variable.
5165 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5166 */
5167static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5168{
5169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5170 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5171 {
5172 /* We could in theory transition from immediate to stack as well, but it
5173 would involve the caller doing work storing the value on the stack. So,
5174 till that's required we only allow transition from invalid. */
5175 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5176 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5177 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5178 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
5179
5180 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
5181 {
5182 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5183 {
5184 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
5185 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5186 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5187 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5188 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
5189 return;
5190 }
5191 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
5192 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5193 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5194 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5195 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5196 uint32_t bmStack = ~pReNative->Core.bmStack;
5197 while (bmStack != UINT32_MAX)
5198 {
5199 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5200 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5201 if (!(iSlot & fBitAlignMask))
5202 {
5203 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5204 {
5205 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5206 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5207 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
5208 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
5209 return;
5210 }
5211 }
5212 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5213 }
5214 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5215 }
5216 }
5217}
5218
5219
5220/**
5221 * Sets it to a variable with a constant value.
5222 *
5223 * This does not require stack storage as we know the value and can always
5224 * reload it, unless of course it's referenced.
5225 *
5226 * @param pReNative The recompiler state.
5227 * @param idxVar The variable.
5228 * @param uValue The immediate value.
5229 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5230 */
5231static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5232{
5233 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5234 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5235 {
5236 /* Only simple trasnsitions for now. */
5237 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5238 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5239 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5240 }
5241 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5242
5243 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5244}
5245
5246
5247/**
5248 * Sets the variable to a reference (pointer) to @a idxOtherVar.
5249 *
5250 * This does not require stack storage as we know the value and can always
5251 * reload it. Loading is postponed till needed.
5252 *
5253 * @param pReNative The recompiler state.
5254 * @param idxVar The variable.
5255 * @param idxOtherVar The variable to take the (stack) address of.
5256 *
5257 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5258 */
5259static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5260{
5261 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5262 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5263
5264 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5265 {
5266 /* Only simple trasnsitions for now. */
5267 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5268 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5269 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5270 }
5271 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5272
5273 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5274
5275 /* Update the other variable, ensure it's a stack variable. */
5276 /** @todo handle variables with const values... that'll go boom now. */
5277 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5278 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5279}
5280
5281
5282/**
5283 * Sets the variable to a reference (pointer) to a guest register reference.
5284 *
5285 * This does not require stack storage as we know the value and can always
5286 * reload it. Loading is postponed till needed.
5287 *
5288 * @param pReNative The recompiler state.
5289 * @param idxVar The variable.
5290 * @param enmRegClass The class guest registers to reference.
5291 * @param idxReg The register within @a enmRegClass to reference.
5292 *
5293 * @throws VERR_IEM_VAR_IPE_2
5294 */
5295static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5296 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
5297{
5298 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5299
5300 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
5301 {
5302 /* Only simple trasnsitions for now. */
5303 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5304 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5305 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
5306 }
5307 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5308
5309 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
5310 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
5311}
5312
5313
5314DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5315{
5316 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5317}
5318
5319
5320DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5321{
5322 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5323 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5324 return idxVar;
5325}
5326
5327
5328DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5329{
5330 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5331 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5332 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5333 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5334
5335 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5336 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5337 return idxArgVar;
5338}
5339
5340
5341DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5342{
5343 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5344 iemNativeVarSetKindToStack(pReNative, idxVar);
5345 return idxVar;
5346}
5347
5348
5349DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5350{
5351 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5352 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5353 return idxVar;
5354}
5355
5356
5357/**
5358 * Makes sure variable @a idxVar has a register assigned to it.
5359 *
5360 * @returns The host register number.
5361 * @param pReNative The recompiler state.
5362 * @param idxVar The variable.
5363 * @param poff Pointer to the instruction buffer offset.
5364 * In case a register needs to be freed up.
5365 */
5366DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5367{
5368 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5369 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
5370
5371 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5372 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5373 {
5374 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5375 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5376 return idxReg;
5377 }
5378
5379 /*
5380 * If the kind of variable has not yet been set, default to 'stack'.
5381 */
5382 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
5383 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5384 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
5385 iemNativeVarSetKindToStack(pReNative, idxVar);
5386
5387 /*
5388 * We have to allocate a register for the variable, even if its a stack one
5389 * as we don't know if there are modification being made to it before its
5390 * finalized (todo: analyze and insert hints about that?).
5391 *
5392 * If we can, we try get the correct register for argument variables. This
5393 * is assuming that most argument variables are fetched as close as possible
5394 * to the actual call, so that there aren't any interfering hidden calls
5395 * (memory accesses, etc) inbetween.
5396 *
5397 * If we cannot or it's a variable, we make sure no argument registers
5398 * that will be used by this MC block will be allocated here, and we always
5399 * prefer non-volatile registers to avoid needing to spill stuff for internal
5400 * call.
5401 */
5402 /** @todo Detect too early argument value fetches and warn about hidden
5403 * calls causing less optimal code to be generated in the python script. */
5404
5405 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5406 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5407 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5408 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5409 else
5410 {
5411 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5412 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5413 & ~pReNative->Core.bmHstRegsWithGstShadow
5414 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5415 & fNotArgsMask;
5416 if (fRegs)
5417 {
5418 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5419 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5420 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5421 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5422 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5423 }
5424 else
5425 {
5426 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5427 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5428 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5429 }
5430 }
5431 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5432 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5433 return idxReg;
5434}
5435
5436
5437/**
5438 * The value of variable @a idxVar will be written in full to the @a enmGstReg
5439 * guest register.
5440 *
5441 * This function makes sure there is a register for it and sets it to be the
5442 * current shadow copy of @a enmGstReg.
5443 *
5444 * @returns The host register number.
5445 * @param pReNative The recompiler state.
5446 * @param idxVar The variable.
5447 * @param enmGstReg The guest register this variable will be written to
5448 * after this call.
5449 * @param poff Pointer to the instruction buffer offset.
5450 * In case a register needs to be freed up or if the
5451 * variable content needs to be loaded off the stack.
5452 *
5453 * @note We DO NOT expect @a idxVar to be an argument variable,
5454 * because we can only in the commit stage of an instruction when this
5455 * function is used.
5456 */
5457DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegisterForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
5458 IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
5459{
5460 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5461 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
5462 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
5463 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
5464 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
5465 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
5466 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5467
5468 /*
5469 * This shouldn't ever be used for arguments, unless it's in a weird else
5470 * branch that doesn't do any calling and even then it's questionable.
5471 *
5472 * However, in case someone writes crazy wrong MC code and does register
5473 * updates before making calls, just use the regular register allocator to
5474 * ensure we get a register suitable for the intended argument number.
5475 */
5476 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarAllocRegister(pReNative, idxVar, poff));
5477
5478 /*
5479 * If there is already a register for the variable, we transfer/set the
5480 * guest shadow copy assignment to it.
5481 */
5482 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5483 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5484 {
5485 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
5486 {
5487 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
5488 iemNativeRegTransferGstRegShadowing(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], idxReg, enmGstReg, *poff);
5489 Log12(("iemNativeVarAllocRegisterForGuestReg: Moved %s for guest %s into %s for full write\n",
5490 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
5491 }
5492 else
5493 {
5494 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
5495 Log12(("iemNativeVarAllocRegisterForGuestReg: Marking %s as copy of guest %s (full write)\n",
5496 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
5497 }
5498 /** @todo figure this one out. We need some way of making sure the register isn't
5499 * modified after this point, just in case we start writing crappy MC code. */
5500 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
5501 return idxReg;
5502 }
5503 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
5504
5505 /*
5506 * Because this is supposed to be the commit stage, we're just tag along with the
5507 * temporary register allocator and upgrade it to a variable register.
5508 */
5509 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
5510 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
5511 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
5512 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
5513 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
5514 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5515
5516 /*
5517 * Now we need to load the register value.
5518 */
5519 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
5520 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
5521 else
5522 {
5523 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5524 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_7));
5525 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
5526 switch (pReNative->Core.aVars[idxVar].cbVar)
5527 {
5528 case sizeof(uint64_t):
5529 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
5530 break;
5531 case sizeof(uint32_t):
5532 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
5533 break;
5534 case sizeof(uint16_t):
5535 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
5536 break;
5537 case sizeof(uint8_t):
5538 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
5539 break;
5540 default:
5541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
5542 }
5543 }
5544
5545 return idxReg;
5546}
5547
5548
5549/**
5550 * Sets the host register for @a idxVarRc to @a idxReg.
5551 *
5552 * The register must not be allocated. Any guest register shadowing will be
5553 * implictly dropped by this call.
5554 *
5555 * The variable must not have any register associated with it (causes
5556 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
5557 * implied.
5558 *
5559 * @param pReNative The recompiler state.
5560 * @param idxVar The variable.
5561 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
5562 * @param off For recording in debug info.
5563 *
5564 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
5565 */
5566DECL_INLINE_THROW(void) iemNativeVarSetRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
5567{
5568 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5569 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5570 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
5571 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
5572
5573 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
5574 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5575
5576 iemNativeVarSetKindToStack(pReNative, idxVar);
5577 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5578}
5579
5580
5581/**
5582 * Worker that frees the stack slots for variable @a idxVar if any allocated.
5583 *
5584 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
5585 */
5586DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5587{
5588 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5589 Assert(idxStackSlot == UINT8_MAX || idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
5590 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
5591 {
5592 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
5593 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
5594 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
5595 Assert(cSlots > 0);
5596 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
5597 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
5598 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
5599 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5600 }
5601}
5602
5603
5604/**
5605 * Worker that frees a single variable.
5606 *
5607 * ASSUMES that @a idxVar is valid.
5608 */
5609DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5610{
5611 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5612 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5613
5614 /* Free the host register first if any assigned. */
5615 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5616 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5617 {
5618 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5619 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5620 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5621 }
5622
5623 /* Free argument mapping. */
5624 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5625 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
5626 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
5627
5628 /* Free the stack slots. */
5629 iemNativeVarFreeStackSlots(pReNative, idxVar);
5630
5631 /* Free the actual variable. */
5632 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5633 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5634}
5635
5636
5637/**
5638 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
5639 */
5640DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
5641{
5642 while (bmVars != 0)
5643 {
5644 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
5645 bmVars &= ~RT_BIT_32(idxVar);
5646
5647#if 1 /** @todo optimize by simplifying this later... */
5648 iemNativeVarFreeOneWorker(pReNative, idxVar);
5649#else
5650 /* Only need to free the host register, the rest is done as bulk updates below. */
5651 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5652 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5653 {
5654 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5655 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5656 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5657 }
5658#endif
5659 }
5660#if 0 /** @todo optimize by simplifying this later... */
5661 pReNative->Core.bmVars = 0;
5662 pReNative->Core.bmStack = 0;
5663 pReNative->Core.u64ArgVars = UINT64_MAX;
5664#endif
5665}
5666
5667
5668/**
5669 * This is called by IEM_MC_END() to clean up all variables.
5670 */
5671DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
5672{
5673 uint32_t const bmVars = pReNative->Core.bmVars;
5674 if (bmVars != 0)
5675 iemNativeVarFreeAllSlow(pReNative, bmVars);
5676 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5677 Assert(pReNative->Core.bmStack == 0);
5678}
5679
5680
5681#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
5682
5683/**
5684 * This is called by IEM_MC_FREE_LOCAL.
5685 */
5686DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5687{
5688 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5689 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
5690 iemNativeVarFreeOneWorker(pReNative, idxVar);
5691}
5692
5693
5694#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
5695
5696/**
5697 * This is called by IEM_MC_FREE_ARG.
5698 */
5699DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5700{
5701 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5702 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
5703 iemNativeVarFreeOneWorker(pReNative, idxVar);
5704}
5705
5706
5707
5708/*********************************************************************************************************************************
5709* Emitters for IEM_MC_CALL_CIMPL_XXX *
5710*********************************************************************************************************************************/
5711
5712/**
5713 * Emits code to load a reference to the given guest register into @a idxGprDst.
5714 */
5715DECL_INLINE_THROW(uint32_t)
5716iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5717 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5718{
5719 /*
5720 * Get the offset relative to the CPUMCTX structure.
5721 */
5722 uint32_t offCpumCtx;
5723 switch (enmClass)
5724 {
5725 case kIemNativeGstRegRef_Gpr:
5726 Assert(idxRegInClass < 16);
5727 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5728 break;
5729
5730 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5731 Assert(idxRegInClass < 4);
5732 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5733 break;
5734
5735 case kIemNativeGstRegRef_EFlags:
5736 Assert(idxRegInClass == 0);
5737 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5738 break;
5739
5740 case kIemNativeGstRegRef_MxCsr:
5741 Assert(idxRegInClass == 0);
5742 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5743 break;
5744
5745 case kIemNativeGstRegRef_FpuReg:
5746 Assert(idxRegInClass < 8);
5747 AssertFailed(); /** @todo what kind of indexing? */
5748 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5749 break;
5750
5751 case kIemNativeGstRegRef_MReg:
5752 Assert(idxRegInClass < 8);
5753 AssertFailed(); /** @todo what kind of indexing? */
5754 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5755 break;
5756
5757 case kIemNativeGstRegRef_XReg:
5758 Assert(idxRegInClass < 16);
5759 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5760 break;
5761
5762 default:
5763 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5764 }
5765
5766 /*
5767 * Load the value into the destination register.
5768 */
5769#ifdef RT_ARCH_AMD64
5770 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5771
5772#elif defined(RT_ARCH_ARM64)
5773 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5774 Assert(offCpumCtx < 4096);
5775 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5776
5777#else
5778# error "Port me!"
5779#endif
5780
5781 return off;
5782}
5783
5784
5785/**
5786 * Common code for CIMPL and AIMPL calls.
5787 *
5788 * These are calls that uses argument variables and such. They should not be
5789 * confused with internal calls required to implement an MC operation,
5790 * like a TLB load and similar.
5791 *
5792 * Upon return all that is left to do is to load any hidden arguments and
5793 * perform the call. All argument variables are freed.
5794 *
5795 * @returns New code buffer offset; throws VBox status code on error.
5796 * @param pReNative The native recompile state.
5797 * @param off The code buffer offset.
5798 * @param cArgs The total nubmer of arguments (includes hidden
5799 * count).
5800 * @param cHiddenArgs The number of hidden arguments. The hidden
5801 * arguments must not have any variable declared for
5802 * them, whereas all the regular arguments must
5803 * (tstIEMCheckMc ensures this).
5804 */
5805DECL_HIDDEN_THROW(uint32_t)
5806iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5807{
5808#ifdef VBOX_STRICT
5809 /*
5810 * Assert sanity.
5811 */
5812 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5813 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5814 for (unsigned i = 0; i < cHiddenArgs; i++)
5815 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5816 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5817 {
5818 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5819 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5820 }
5821#endif
5822
5823 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5824
5825 /*
5826 * First, go over the host registers that will be used for arguments and make
5827 * sure they either hold the desired argument or are free.
5828 */
5829 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5830 for (uint32_t i = 0; i < cRegArgs; i++)
5831 {
5832 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5833 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5834 {
5835 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5836 {
5837 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5838 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5839 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5840 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5841 if (uArgNo == i)
5842 { /* prefect */ }
5843 else
5844 {
5845 /* The variable allocator logic should make sure this is impossible. */
5846 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5847
5848 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5849 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5850 else
5851 {
5852 /* just free it, can be reloaded if used again */
5853 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5854 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5855 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5856 }
5857 }
5858 }
5859 else
5860 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5861 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5862 }
5863 }
5864
5865 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5866
5867#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5868 /*
5869 * If there are any stack arguments, make sure they are in their place as well.
5870 *
5871 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
5872 * the caller) be loading it later and it must be free (see first loop).
5873 */
5874 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5875 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5876 {
5877 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5878 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5879 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5880 {
5881 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5882 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5883 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5884 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5885 }
5886 else
5887 {
5888 /* Use ARG0 as temp for stuff we need registers for. */
5889 switch (pReNative->Core.aVars[idxVar].enmKind)
5890 {
5891 case kIemNativeVarKind_Stack:
5892 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5893 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5894 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5895 iemNativeVarCalcBpDisp(pReNative, idxVar));
5896 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5897 continue;
5898
5899 case kIemNativeVarKind_Immediate:
5900 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5901 continue;
5902
5903 case kIemNativeVarKind_VarRef:
5904 {
5905 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5906 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5907 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5908 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5909 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5910 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
5911 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5912 continue;
5913 }
5914
5915 case kIemNativeVarKind_GstRegRef:
5916 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5917 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5918 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5919 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5920 continue;
5921
5922 case kIemNativeVarKind_Invalid:
5923 case kIemNativeVarKind_End:
5924 break;
5925 }
5926 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5927 }
5928 }
5929#else
5930 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5931#endif
5932
5933 /*
5934 * Make sure the argument variables are loaded into their respective registers.
5935 *
5936 * We can optimize this by ASSUMING that any register allocations are for
5937 * registeres that have already been loaded and are ready. The previous step
5938 * saw to that.
5939 */
5940 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5941 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5942 {
5943 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5944 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5945 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5946 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5947 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5948 else
5949 {
5950 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5951 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5952 {
5953 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5954 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5955 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5956 | RT_BIT_32(idxArgReg);
5957 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5958 }
5959 else
5960 {
5961 /* Use ARG0 as temp for stuff we need registers for. */
5962 switch (pReNative->Core.aVars[idxVar].enmKind)
5963 {
5964 case kIemNativeVarKind_Stack:
5965 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5966 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5967 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeVarCalcBpDisp(pReNative, idxVar));
5968 continue;
5969
5970 case kIemNativeVarKind_Immediate:
5971 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5972 continue;
5973
5974 case kIemNativeVarKind_VarRef:
5975 {
5976 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5977 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5978 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5979 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5980 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5981 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
5982 continue;
5983 }
5984
5985 case kIemNativeVarKind_GstRegRef:
5986 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5987 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5988 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5989 continue;
5990
5991 case kIemNativeVarKind_Invalid:
5992 case kIemNativeVarKind_End:
5993 break;
5994 }
5995 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5996 }
5997 }
5998 }
5999#ifdef VBOX_STRICT
6000 else
6001 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
6002 {
6003 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
6004 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
6005 }
6006#endif
6007
6008 /*
6009 * Free all argument variables (simplified).
6010 * Their lifetime always expires with the call they are for.
6011 */
6012 /** @todo Make the python script check that arguments aren't used after
6013 * IEM_MC_CALL_XXXX. */
6014 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
6015 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
6016 * an argument value. There is also some FPU stuff. */
6017 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
6018 {
6019 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
6020 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
6021
6022 /* no need to free registers: */
6023 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
6024 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
6025 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
6026 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
6027 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
6028 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
6029
6030 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
6031 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6032 iemNativeVarFreeStackSlots(pReNative, idxVar);
6033 }
6034 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
6035
6036 /*
6037 * Flush volatile registers as we make the call.
6038 */
6039 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
6040
6041 return off;
6042}
6043
6044
6045/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
6046DECL_HIDDEN_THROW(uint32_t)
6047iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
6048 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
6049
6050{
6051 /*
6052 * Do all the call setup and cleanup.
6053 */
6054 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
6055
6056 /*
6057 * Load the two or three hidden arguments.
6058 */
6059#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6060 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6061 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6062 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
6063#else
6064 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6065 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
6066#endif
6067
6068 /*
6069 * Make the call and check the return code.
6070 *
6071 * Shadow PC copies are always flushed here, other stuff depends on flags.
6072 * Segment and general purpose registers are explictily flushed via the
6073 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
6074 * macros.
6075 */
6076 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
6077#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
6078 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6079#endif
6080 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
6081 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
6082 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6083 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6084
6085 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6086}
6087
6088
6089#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6090 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
6091
6092/** Emits code for IEM_MC_CALL_CIMPL_1. */
6093DECL_INLINE_THROW(uint32_t)
6094iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6095 uintptr_t pfnCImpl, uint8_t idxArg0)
6096{
6097 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6098 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
6099}
6100
6101
6102#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6103 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
6104
6105/** Emits code for IEM_MC_CALL_CIMPL_2. */
6106DECL_INLINE_THROW(uint32_t)
6107iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6108 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
6109{
6110 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6111 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6112 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
6113}
6114
6115
6116#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6117 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6118 (uintptr_t)a_pfnCImpl, a0, a1, a2)
6119
6120/** Emits code for IEM_MC_CALL_CIMPL_3. */
6121DECL_INLINE_THROW(uint32_t)
6122iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6123 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
6124{
6125 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6126 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6127 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6128 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
6129}
6130
6131
6132#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
6133 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6134 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
6135
6136/** Emits code for IEM_MC_CALL_CIMPL_4. */
6137DECL_INLINE_THROW(uint32_t)
6138iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6139 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
6140{
6141 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6142 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6143 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6144 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
6145 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
6146}
6147
6148
6149#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
6150 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
6151 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
6152
6153/** Emits code for IEM_MC_CALL_CIMPL_4. */
6154DECL_INLINE_THROW(uint32_t)
6155iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
6156 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
6157{
6158 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
6159 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
6160 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
6161 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
6162 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
6163 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
6164}
6165
6166
6167/** Recompiler debugging: Flush guest register shadow copies. */
6168#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
6169
6170
6171
6172/*********************************************************************************************************************************
6173* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
6174*********************************************************************************************************************************/
6175
6176/**
6177 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
6178 */
6179DECL_INLINE_THROW(uint32_t)
6180iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6181 uintptr_t pfnAImpl, uint8_t cArgs)
6182{
6183 if (idxVarRc != UINT8_MAX)
6184 {
6185 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
6186 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
6187 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
6188 }
6189
6190 /*
6191 * Do all the call setup and cleanup.
6192 */
6193 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
6194
6195 /*
6196 * Make the call and update the return code variable if we've got one.
6197 */
6198 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
6199 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
6200 {
6201pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
6202 iemNativeVarSetRegister(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
6203 }
6204
6205 return off;
6206}
6207
6208
6209
6210#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
6211 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
6212
6213#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
6214 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
6215
6216/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
6217DECL_INLINE_THROW(uint32_t)
6218iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
6219{
6220 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
6221}
6222
6223
6224#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
6225 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
6226
6227#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
6228 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
6229
6230/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
6231DECL_INLINE_THROW(uint32_t)
6232iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
6233{
6234 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6235 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
6236}
6237
6238
6239#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
6240 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
6241
6242#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
6243 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
6244
6245/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
6246DECL_INLINE_THROW(uint32_t)
6247iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6248 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
6249{
6250 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6251 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6252 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
6253}
6254
6255
6256#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
6257 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
6258
6259#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
6260 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
6261
6262/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
6263DECL_INLINE_THROW(uint32_t)
6264iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6265 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
6266{
6267 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6268 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6269 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
6270 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
6271}
6272
6273
6274#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
6275 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
6276
6277#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
6278 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
6279
6280/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
6281DECL_INLINE_THROW(uint32_t)
6282iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
6283 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
6284{
6285 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
6286 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
6287 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
6288 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
6289 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
6290}
6291
6292
6293
6294/*********************************************************************************************************************************
6295* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
6296*********************************************************************************************************************************/
6297
6298#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
6299 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx)
6300
6301/** Emits code for IEM_MC_FETCH_GREG_U8. */
6302DECL_INLINE_THROW(uint32_t)
6303iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx)
6304{
6305 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6306 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint8_t));
6307 Assert(iGRegEx < 20);
6308
6309 /* Same discussion as in iemNativeEmitFetchGregU16 */
6310 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
6311 kIemNativeGstRegUse_ReadOnly);
6312
6313 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6314 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6315
6316 if (iGRegEx < 16)
6317 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
6318 else
6319 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
6320
6321 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6322 return off;
6323}
6324
6325
6326#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
6327 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
6328
6329/** Emits code for IEM_MC_FETCH_GREG_U16. */
6330DECL_INLINE_THROW(uint32_t)
6331iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6332{
6333 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6334 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
6335 Assert(iGReg < 16);
6336
6337 /*
6338 * We can either just load the low 16-bit of the GPR into a host register
6339 * for the variable, or we can do so via a shadow copy host register. The
6340 * latter will avoid having to reload it if it's being stored later, but
6341 * will waste a host register if it isn't touched again. Since we don't
6342 * know what going to happen, we choose the latter for now.
6343 */
6344 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6345 kIemNativeGstRegUse_ReadOnly);
6346
6347 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6348 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6349 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
6350
6351 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6352 return off;
6353}
6354
6355
6356#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
6357 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg)
6358
6359/** Emits code for IEM_MC_FETCH_GREG_U32. */
6360DECL_INLINE_THROW(uint32_t)
6361iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
6362{
6363 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
6364 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint32_t));
6365 Assert(iGReg < 16);
6366
6367 /*
6368 * We can either just load the low 16-bit of the GPR into a host register
6369 * for the variable, or we can do so via a shadow copy host register. The
6370 * latter will avoid having to reload it if it's being stored later, but
6371 * will waste a host register if it isn't touched again. Since we don't
6372 * know what going to happen, we choose the latter for now.
6373 */
6374 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6375 kIemNativeGstRegUse_ReadOnly);
6376
6377 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6378 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6379 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
6380
6381 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
6382 return off;
6383}
6384
6385
6386
6387/*********************************************************************************************************************************
6388* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
6389*********************************************************************************************************************************/
6390
6391#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
6392 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
6393
6394/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
6395DECL_INLINE_THROW(uint32_t)
6396iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
6397{
6398 Assert(iGRegEx < 20);
6399 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
6400 kIemNativeGstRegUse_ForUpdate);
6401#ifdef RT_ARCH_AMD64
6402 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6403
6404 /* To the lowest byte of the register: mov r8, imm8 */
6405 if (iGRegEx < 16)
6406 {
6407 if (idxGstTmpReg >= 8)
6408 pbCodeBuf[off++] = X86_OP_REX_B;
6409 else if (idxGstTmpReg >= 4)
6410 pbCodeBuf[off++] = X86_OP_REX;
6411 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6412 pbCodeBuf[off++] = u8Value;
6413 }
6414 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
6415 else if (idxGstTmpReg < 4)
6416 {
6417 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
6418 pbCodeBuf[off++] = u8Value;
6419 }
6420 else
6421 {
6422 /* ror reg64, 8 */
6423 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6424 pbCodeBuf[off++] = 0xc1;
6425 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6426 pbCodeBuf[off++] = 8;
6427
6428 /* mov reg8, imm8 */
6429 if (idxGstTmpReg >= 8)
6430 pbCodeBuf[off++] = X86_OP_REX_B;
6431 else if (idxGstTmpReg >= 4)
6432 pbCodeBuf[off++] = X86_OP_REX;
6433 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
6434 pbCodeBuf[off++] = u8Value;
6435
6436 /* rol reg64, 8 */
6437 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
6438 pbCodeBuf[off++] = 0xc1;
6439 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
6440 pbCodeBuf[off++] = 8;
6441 }
6442
6443#elif defined(RT_ARCH_ARM64)
6444 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
6445 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6446 if (iGRegEx < 16)
6447 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
6448 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
6449 else
6450 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
6451 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
6452 iemNativeRegFreeTmp(pReNative, idxImmReg);
6453
6454#else
6455# error "Port me!"
6456#endif
6457
6458 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6459
6460 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
6461
6462 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6463 return off;
6464}
6465
6466
6467#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
6468 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
6469
6470/** Emits code for IEM_MC_STORE_GREG_U16. */
6471DECL_INLINE_THROW(uint32_t)
6472iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
6473{
6474 Assert(iGReg < 16);
6475 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6476 kIemNativeGstRegUse_ForUpdate);
6477#ifdef RT_ARCH_AMD64
6478 /* mov reg16, imm16 */
6479 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6480 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6481 if (idxGstTmpReg >= 8)
6482 pbCodeBuf[off++] = X86_OP_REX_B;
6483 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
6484 pbCodeBuf[off++] = RT_BYTE1(uValue);
6485 pbCodeBuf[off++] = RT_BYTE2(uValue);
6486
6487#elif defined(RT_ARCH_ARM64)
6488 /* movk xdst, #uValue, lsl #0 */
6489 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6490 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
6491
6492#else
6493# error "Port me!"
6494#endif
6495
6496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6497
6498 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6499 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6500 return off;
6501}
6502
6503
6504#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
6505 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
6506
6507/** Emits code for IEM_MC_STORE_GREG_U16. */
6508DECL_INLINE_THROW(uint32_t)
6509iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6510{
6511 Assert(iGReg < 16);
6512 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6513
6514 /*
6515 * If it's a constant value (unlikely) we treat this as a
6516 * IEM_MC_STORE_GREG_U16_CONST statement.
6517 */
6518 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6519 { /* likely */ }
6520 else
6521 {
6522 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6523 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6524 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6525 }
6526
6527 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6528 kIemNativeGstRegUse_ForUpdate);
6529
6530#ifdef RT_ARCH_AMD64
6531 /* mov reg16, reg16 or [mem16] */
6532 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6533 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6534 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6535 {
6536 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
6537 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
6538 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
6539 pbCodeBuf[off++] = 0x8b;
6540 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
6541 }
6542 else
6543 {
6544 AssertStmt(pReNative->Core.aVars[idxValueVar].idxStackSlot != UINT8_MAX,
6545 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
6546 if (idxGstTmpReg >= 8)
6547 pbCodeBuf[off++] = X86_OP_REX_R;
6548 pbCodeBuf[off++] = 0x8b;
6549 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeVarCalcBpDisp(pReNative, idxValueVar), pReNative);
6550 }
6551
6552#elif defined(RT_ARCH_ARM64)
6553 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6554 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6555 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6556 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
6557
6558#else
6559# error "Port me!"
6560#endif
6561
6562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6563
6564 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6565 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6566 return off;
6567}
6568
6569
6570#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
6571 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
6572
6573/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
6574DECL_INLINE_THROW(uint32_t)
6575iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
6576{
6577 Assert(iGReg < 16);
6578 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6579 kIemNativeGstRegUse_ForFullWrite);
6580 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
6581 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6582 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6583 return off;
6584}
6585
6586
6587#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
6588 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
6589
6590/** Emits code for IEM_MC_STORE_GREG_U32. */
6591DECL_INLINE_THROW(uint32_t)
6592iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6593{
6594 Assert(iGReg < 16);
6595 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6596
6597 /*
6598 * If it's a constant value (unlikely) we treat this as a
6599 * IEM_MC_STORE_GREG_U32_CONST statement.
6600 */
6601 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6602 { /* likely */ }
6603 else
6604 {
6605 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6606 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6607 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6608 }
6609
6610 /*
6611 * For the rest we allocate a guest register for the variable and writes
6612 * it to the CPUMCTX structure.
6613 */
6614 uint8_t const idxVarReg = iemNativeVarAllocRegisterForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
6615 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6616#ifdef VBOX_STRICT
6617 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
6618#endif
6619 return off;
6620}
6621
6622
6623
6624#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
6625 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
6626
6627/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
6628DECL_INLINE_THROW(uint32_t)
6629iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
6630{
6631 Assert(iGReg < 16);
6632 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6633 kIemNativeGstRegUse_ForUpdate);
6634 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
6635 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6636 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6637 return off;
6638}
6639
6640
6641/*********************************************************************************************************************************
6642* General purpose register manipulation (add, sub). *
6643*********************************************************************************************************************************/
6644
6645#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
6646 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
6647
6648/** Emits code for IEM_MC_SUB_GREG_U16. */
6649DECL_INLINE_THROW(uint32_t)
6650iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
6651{
6652 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6653 kIemNativeGstRegUse_ForUpdate);
6654
6655#ifdef RT_ARCH_AMD64
6656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6657 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6658 if (idxGstTmpReg >= 8)
6659 pbCodeBuf[off++] = X86_OP_REX_B;
6660 if (uSubtrahend)
6661 {
6662 pbCodeBuf[off++] = 0xff; /* dec */
6663 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6664 }
6665 else
6666 {
6667 pbCodeBuf[off++] = 0x81;
6668 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6669 pbCodeBuf[off++] = uSubtrahend;
6670 pbCodeBuf[off++] = 0;
6671 }
6672
6673#else
6674 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6675 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6676
6677 /* sub tmp, gstgrp, uSubtrahend */
6678 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
6679
6680 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
6681 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
6682
6683 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6684#endif
6685
6686 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6687
6688 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6689
6690 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6691 return off;
6692}
6693
6694
6695#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
6696 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
6697
6698#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
6699 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
6700
6701/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
6702DECL_INLINE_THROW(uint32_t)
6703iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
6704{
6705 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
6706 kIemNativeGstRegUse_ForUpdate);
6707
6708#ifdef RT_ARCH_AMD64
6709 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6710 if (f64Bit)
6711 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
6712 else if (idxGstTmpReg >= 8)
6713 pbCodeBuf[off++] = X86_OP_REX_B;
6714 if (uSubtrahend == 1)
6715 {
6716 /* dec */
6717 pbCodeBuf[off++] = 0xff;
6718 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6719 }
6720 else if (uSubtrahend < 128)
6721 {
6722 pbCodeBuf[off++] = 0x83; /* sub */
6723 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6724 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6725 }
6726 else
6727 {
6728 pbCodeBuf[off++] = 0x81; /* sub */
6729 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6730 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6731 pbCodeBuf[off++] = 0;
6732 pbCodeBuf[off++] = 0;
6733 pbCodeBuf[off++] = 0;
6734 }
6735
6736#else
6737 /* sub tmp, gstgrp, uSubtrahend */
6738 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6739 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
6740
6741#endif
6742
6743 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6744
6745 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6746
6747 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6748 return off;
6749}
6750
6751
6752
6753/*********************************************************************************************************************************
6754* Register references. *
6755*********************************************************************************************************************************/
6756
6757#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6758 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6759
6760#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
6761 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6762
6763/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6764DECL_INLINE_THROW(uint32_t)
6765iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6766{
6767 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6768 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
6769 Assert(iGRegEx < 20);
6770
6771 if (iGRegEx < 16)
6772 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6773 else
6774 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6775
6776 /* If we've delayed writing back the register value, flush it now. */
6777 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6778
6779 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6780 if (!fConst)
6781 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6782
6783 return off;
6784}
6785
6786#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6787 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6788
6789#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6790 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6791
6792#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6793 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6794
6795#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6796 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6797
6798#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6799 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6800
6801#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6802 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6803
6804#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6805 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6806
6807#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6808 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6809
6810#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6811 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6812
6813#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6814 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6815
6816/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6817DECL_INLINE_THROW(uint32_t)
6818iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6819{
6820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6821 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
6822 Assert(iGReg < 16);
6823
6824 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6825
6826 /* If we've delayed writing back the register value, flush it now. */
6827 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6828
6829 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6830 if (!fConst)
6831 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6832
6833 return off;
6834}
6835
6836
6837#define IEM_MC_REF_EFLAGS(a_pEFlags) \
6838 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
6839
6840/** Handles IEM_MC_REF_EFLAGS. */
6841DECL_INLINE_THROW(uint32_t)
6842iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
6843{
6844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6845 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
6846
6847 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6848
6849 /* If we've delayed writing back the register value, flush it now. */
6850 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6851
6852 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6853 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6854
6855 return off;
6856}
6857
6858
6859/*********************************************************************************************************************************
6860* Effective Address Calculation *
6861*********************************************************************************************************************************/
6862#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6863 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6864
6865/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6866 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6867DECL_INLINE_THROW(uint32_t)
6868iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6869 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6870{
6871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6872
6873 /*
6874 * Handle the disp16 form with no registers first.
6875 *
6876 * Convert to an immediate value, as that'll delay the register allocation
6877 * and assignment till the memory access / call / whatever and we can use
6878 * a more appropriate register (or none at all).
6879 */
6880 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6881 {
6882 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6883 return off;
6884 }
6885
6886 /* Determin the displacment. */
6887 uint16_t u16EffAddr;
6888 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6889 {
6890 case 0: u16EffAddr = 0; break;
6891 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6892 case 2: u16EffAddr = u16Disp; break;
6893 default: AssertFailedStmt(u16EffAddr = 0);
6894 }
6895
6896 /* Determine the registers involved. */
6897 uint8_t idxGstRegBase;
6898 uint8_t idxGstRegIndex;
6899 switch (bRm & X86_MODRM_RM_MASK)
6900 {
6901 case 0:
6902 idxGstRegBase = X86_GREG_xBX;
6903 idxGstRegIndex = X86_GREG_xSI;
6904 break;
6905 case 1:
6906 idxGstRegBase = X86_GREG_xBX;
6907 idxGstRegIndex = X86_GREG_xDI;
6908 break;
6909 case 2:
6910 idxGstRegBase = X86_GREG_xBP;
6911 idxGstRegIndex = X86_GREG_xSI;
6912 break;
6913 case 3:
6914 idxGstRegBase = X86_GREG_xBP;
6915 idxGstRegIndex = X86_GREG_xDI;
6916 break;
6917 case 4:
6918 idxGstRegBase = X86_GREG_xSI;
6919 idxGstRegIndex = UINT8_MAX;
6920 break;
6921 case 5:
6922 idxGstRegBase = X86_GREG_xDI;
6923 idxGstRegIndex = UINT8_MAX;
6924 break;
6925 case 6:
6926 idxGstRegBase = X86_GREG_xBP;
6927 idxGstRegIndex = UINT8_MAX;
6928 break;
6929#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6930 default:
6931#endif
6932 case 7:
6933 idxGstRegBase = X86_GREG_xBX;
6934 idxGstRegIndex = UINT8_MAX;
6935 break;
6936 }
6937
6938 /*
6939 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6940 */
6941 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
6942 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6943 kIemNativeGstRegUse_ReadOnly);
6944 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6945 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6946 kIemNativeGstRegUse_ReadOnly)
6947 : UINT8_MAX;
6948#ifdef RT_ARCH_AMD64
6949 if (idxRegIndex == UINT8_MAX)
6950 {
6951 if (u16EffAddr == 0)
6952 {
6953 /* movxz ret, base */
6954 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6955 }
6956 else
6957 {
6958 /* lea ret32, [base64 + disp32] */
6959 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6960 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6961 if (idxRegRet >= 8 || idxRegBase >= 8)
6962 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6963 pbCodeBuf[off++] = 0x8d;
6964 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6965 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6966 else
6967 {
6968 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6969 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6970 }
6971 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6972 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6973 pbCodeBuf[off++] = 0;
6974 pbCodeBuf[off++] = 0;
6975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6976
6977 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6978 }
6979 }
6980 else
6981 {
6982 /* lea ret32, [index64 + base64 (+ disp32)] */
6983 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6984 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6985 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6986 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6987 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6988 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6989 pbCodeBuf[off++] = 0x8d;
6990 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6991 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6992 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6993 if (bMod == X86_MOD_MEM4)
6994 {
6995 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6996 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6997 pbCodeBuf[off++] = 0;
6998 pbCodeBuf[off++] = 0;
6999 }
7000 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7001 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
7002 }
7003
7004#elif defined(RT_ARCH_ARM64)
7005 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7006 if (u16EffAddr == 0)
7007 {
7008 if (idxRegIndex == UINT8_MAX)
7009 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
7010 else
7011 {
7012 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
7013 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
7014 }
7015 }
7016 else
7017 {
7018 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
7019 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
7020 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
7021 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
7022 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
7023 else
7024 {
7025 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
7026 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
7027 }
7028 if (idxRegIndex != UINT8_MAX)
7029 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
7030 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
7031 }
7032
7033#else
7034# error "port me"
7035#endif
7036
7037 if (idxRegIndex != UINT8_MAX)
7038 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7039 iemNativeRegFreeTmp(pReNative, idxRegBase);
7040 return off;
7041}
7042
7043
7044#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
7045 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
7046
7047/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
7048 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
7049DECL_INLINE_THROW(uint32_t)
7050iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7051 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
7052{
7053 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
7054
7055 /*
7056 * Handle the disp32 form with no registers first.
7057 *
7058 * Convert to an immediate value, as that'll delay the register allocation
7059 * and assignment till the memory access / call / whatever and we can use
7060 * a more appropriate register (or none at all).
7061 */
7062 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
7063 {
7064 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
7065 return off;
7066 }
7067
7068 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
7069 uint32_t u32EffAddr = 0;
7070 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
7071 {
7072 case 0: break;
7073 case 1: u32EffAddr = (int8_t)u32Disp; break;
7074 case 2: u32EffAddr = u32Disp; break;
7075 default: AssertFailed();
7076 }
7077
7078 /* Get the register (or SIB) value. */
7079 uint8_t idxGstRegBase = UINT8_MAX;
7080 uint8_t idxGstRegIndex = UINT8_MAX;
7081 uint8_t cShiftIndex = 0;
7082 switch (bRm & X86_MODRM_RM_MASK)
7083 {
7084 case 0: idxGstRegBase = X86_GREG_xAX; break;
7085 case 1: idxGstRegBase = X86_GREG_xCX; break;
7086 case 2: idxGstRegBase = X86_GREG_xDX; break;
7087 case 3: idxGstRegBase = X86_GREG_xBX; break;
7088 case 4: /* SIB */
7089 {
7090 /* index /w scaling . */
7091 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
7092 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
7093 {
7094 case 0: idxGstRegIndex = X86_GREG_xAX; break;
7095 case 1: idxGstRegIndex = X86_GREG_xCX; break;
7096 case 2: idxGstRegIndex = X86_GREG_xDX; break;
7097 case 3: idxGstRegIndex = X86_GREG_xBX; break;
7098 case 4: cShiftIndex = 0; /*no index*/ break;
7099 case 5: idxGstRegIndex = X86_GREG_xBP; break;
7100 case 6: idxGstRegIndex = X86_GREG_xSI; break;
7101 case 7: idxGstRegIndex = X86_GREG_xDI; break;
7102 }
7103
7104 /* base */
7105 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
7106 {
7107 case 0: idxGstRegBase = X86_GREG_xAX; break;
7108 case 1: idxGstRegBase = X86_GREG_xCX; break;
7109 case 2: idxGstRegBase = X86_GREG_xDX; break;
7110 case 3: idxGstRegBase = X86_GREG_xBX; break;
7111 case 4:
7112 idxGstRegBase = X86_GREG_xSP;
7113 u32EffAddr += uSibAndRspOffset >> 8;
7114 break;
7115 case 5:
7116 if ((bRm & X86_MODRM_MOD_MASK) != 0)
7117 idxGstRegBase = X86_GREG_xBP;
7118 else
7119 {
7120 Assert(u32EffAddr == 0);
7121 u32EffAddr = u32Disp;
7122 }
7123 break;
7124 case 6: idxGstRegBase = X86_GREG_xSI; break;
7125 case 7: idxGstRegBase = X86_GREG_xDI; break;
7126 }
7127 break;
7128 }
7129 case 5: idxGstRegBase = X86_GREG_xBP; break;
7130 case 6: idxGstRegBase = X86_GREG_xSI; break;
7131 case 7: idxGstRegBase = X86_GREG_xDI; break;
7132 }
7133
7134 /*
7135 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
7136 * the start of the function.
7137 */
7138 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
7139 {
7140 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
7141 return off;
7142 }
7143
7144 /*
7145 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
7146 */
7147 uint8_t const idxRegRet = iemNativeVarAllocRegister(pReNative, idxVarRet, &off);
7148 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
7149 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
7150 kIemNativeGstRegUse_ReadOnly);
7151 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
7152 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
7153 kIemNativeGstRegUse_ReadOnly);
7154
7155 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
7156 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
7157 {
7158 idxRegBase = idxRegIndex;
7159 idxRegIndex = UINT8_MAX;
7160 }
7161
7162#ifdef RT_ARCH_AMD64
7163 if (idxRegIndex == UINT8_MAX)
7164 {
7165 if (u32EffAddr == 0)
7166 {
7167 /* mov ret, base */
7168 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7169 }
7170 else
7171 {
7172 /* lea ret32, [base64 + disp32] */
7173 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
7174 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7175 if (idxRegRet >= 8 || idxRegBase >= 8)
7176 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
7177 pbCodeBuf[off++] = 0x8d;
7178 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7179 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7180 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7181 else
7182 {
7183 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7184 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7185 }
7186 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7187 if (bMod == X86_MOD_MEM4)
7188 {
7189 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7190 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7191 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7192 }
7193 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7194 }
7195 }
7196 else
7197 {
7198 /* lea ret32, [index64 << cShiftIndex (+ base64) (+ disp32)] */
7199 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7200 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7201 if (idxRegRet >= 8 || (idxRegBase >= 8 && idxRegBase != UINT8_MAX) || idxRegIndex >= 8)
7202 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7203 | (idxRegBase >= 8 && idxRegBase != UINT8_MAX ? X86_OP_REX_B : 0)
7204 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
7205 pbCodeBuf[off++] = 0x8d;
7206 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP && idxRegBase != UINT8_MAX ? X86_MOD_MEM0
7207 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7208 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7209 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase != UINT8_MAX ? idxRegBase & 7 : 5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7210 if (bMod != X86_MOD_MEM0)
7211 {
7212 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7213 if (bMod == X86_MOD_MEM4)
7214 {
7215 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7216 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7217 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7218 }
7219 }
7220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7221 }
7222
7223#elif defined(RT_ARCH_ARM64)
7224 if (u32EffAddr == 0)
7225 {
7226 if (idxRegIndex == UINT8_MAX)
7227 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7228 else if (idxRegBase == UINT8_MAX)
7229 {
7230 if (cShiftIndex == 0)
7231 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
7232 else
7233 {
7234 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7235 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
7236 }
7237 }
7238 else
7239 {
7240 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7242 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
7243 }
7244 }
7245 else
7246 {
7247 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
7248 {
7249 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7250 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
7251 }
7252 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
7253 {
7254 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7255 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
7256 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
7257 }
7258 else
7259 {
7260 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
7261 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7262 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u32EffAddr);
7263 if (idxRegBase != UINT8_MAX)
7264 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
7265 }
7266 if (idxRegIndex != UINT8_MAX)
7267 {
7268 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7269 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7270 false /*f64Bit*/ false /*fSetFlags*/, cShiftIndex);
7271 }
7272 }
7273
7274#else
7275# error "port me"
7276#endif
7277
7278 if (idxRegIndex != UINT8_MAX)
7279 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7280 if (idxRegBase != UINT8_MAX)
7281 iemNativeRegFreeTmp(pReNative, idxRegBase);
7282 return off;
7283}
7284
7285
7286#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7287 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff)
7288
7289#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7290 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 64)
7291
7292#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
7293 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm, a_GCPtrEff, 32)
7294
7295
7296
7297/*********************************************************************************************************************************
7298* Builtin functions *
7299*********************************************************************************************************************************/
7300
7301/**
7302 * Built-in function that calls a C-implemention function taking zero arguments.
7303 */
7304static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
7305{
7306 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
7307 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
7308 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
7309 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
7310}
7311
7312
7313/**
7314 * Built-in function that checks for pending interrupts that can be delivered or
7315 * forced action flags.
7316 *
7317 * This triggers after the completion of an instruction, so EIP is already at
7318 * the next instruction. If an IRQ or important FF is pending, this will return
7319 * a non-zero status that stops TB execution.
7320 */
7321static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
7322{
7323 RT_NOREF(pCallEntry);
7324
7325 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
7326 and I'm too lazy to create a 'Fixed' version of that one. */
7327 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
7328 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
7329
7330 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
7331
7332 /* Again, we need to load the extended EFLAGS before we actually need them
7333 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
7334 loaded them inside the check, as the shadow state would not be correct
7335 when the code branches before the load. Ditto PC. */
7336 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7337 kIemNativeGstRegUse_ReadOnly);
7338
7339 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
7340
7341 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7342
7343 /*
7344 * Start by checking the local forced actions of the EMT we're on for IRQs
7345 * and other FFs that needs servicing.
7346 */
7347 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
7348 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
7349 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
7350 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
7351 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
7352 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
7353 | VMCPU_FF_TLB_FLUSH
7354 | VMCPU_FF_UNHALT ),
7355 true /*fSetFlags*/);
7356 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
7357 uint32_t const offFixupJumpToVmCheck1 = off;
7358 off = iemNativeEmitJzToFixed(pReNative, off, 0);
7359
7360 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
7361 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
7362 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
7363 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
7364 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
7365 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
7366
7367 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
7368 suppressed by the CPU or not. */
7369 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
7370 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
7371 idxLabelReturnBreak);
7372
7373 /* We've got shadow flags set, so we must check that the PC they are valid
7374 for matches our current PC value. */
7375 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
7376 * a register. */
7377 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
7378 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
7379
7380 /*
7381 * Now check the force flags of the VM.
7382 */
7383 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
7384 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
7385 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
7386 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
7387 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
7388 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
7389
7390 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
7391
7392 /*
7393 * We're good, no IRQs or FFs pending.
7394 */
7395 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7396 iemNativeRegFreeTmp(pReNative, idxEflReg);
7397 iemNativeRegFreeTmp(pReNative, idxPcReg);
7398
7399 return off;
7400}
7401
7402
7403/**
7404 * Built-in function checks if IEMCPU::fExec has the expected value.
7405 */
7406static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
7407{
7408 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
7409 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7410
7411 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
7412 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
7413 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
7414 kIemNativeLabelType_ReturnBreak);
7415 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7416 return off;
7417}
7418
7419
7420
7421/*********************************************************************************************************************************
7422* The native code generator functions for each MC block. *
7423*********************************************************************************************************************************/
7424
7425
7426/*
7427 * Include g_apfnIemNativeRecompileFunctions and associated functions.
7428 *
7429 * This should probably live in it's own file later, but lets see what the
7430 * compile times turn out to be first.
7431 */
7432#include "IEMNativeFunctions.cpp.h"
7433
7434
7435
7436/*********************************************************************************************************************************
7437* Recompiler Core. *
7438*********************************************************************************************************************************/
7439
7440
7441/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
7442static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
7443{
7444 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
7445 pDis->cbCachedInstr += cbMaxRead;
7446 RT_NOREF(cbMinRead);
7447 return VERR_NO_DATA;
7448}
7449
7450
7451/**
7452 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
7453 * @returns pszBuf.
7454 * @param fFlags The flags.
7455 * @param pszBuf The output buffer.
7456 * @param cbBuf The output buffer size. At least 32 bytes.
7457 */
7458DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
7459{
7460 Assert(cbBuf >= 32);
7461 static RTSTRTUPLE const s_aModes[] =
7462 {
7463 /* [00] = */ { RT_STR_TUPLE("16BIT") },
7464 /* [01] = */ { RT_STR_TUPLE("32BIT") },
7465 /* [02] = */ { RT_STR_TUPLE("!2!") },
7466 /* [03] = */ { RT_STR_TUPLE("!3!") },
7467 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
7468 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
7469 /* [06] = */ { RT_STR_TUPLE("!6!") },
7470 /* [07] = */ { RT_STR_TUPLE("!7!") },
7471 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
7472 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
7473 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
7474 /* [0b] = */ { RT_STR_TUPLE("!b!") },
7475 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
7476 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
7477 /* [0e] = */ { RT_STR_TUPLE("!e!") },
7478 /* [0f] = */ { RT_STR_TUPLE("!f!") },
7479 /* [10] = */ { RT_STR_TUPLE("!10!") },
7480 /* [11] = */ { RT_STR_TUPLE("!11!") },
7481 /* [12] = */ { RT_STR_TUPLE("!12!") },
7482 /* [13] = */ { RT_STR_TUPLE("!13!") },
7483 /* [14] = */ { RT_STR_TUPLE("!14!") },
7484 /* [15] = */ { RT_STR_TUPLE("!15!") },
7485 /* [16] = */ { RT_STR_TUPLE("!16!") },
7486 /* [17] = */ { RT_STR_TUPLE("!17!") },
7487 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
7488 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
7489 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
7490 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
7491 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
7492 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
7493 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
7494 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
7495 };
7496 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
7497 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
7498 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
7499
7500 pszBuf[off++] = ' ';
7501 pszBuf[off++] = 'C';
7502 pszBuf[off++] = 'P';
7503 pszBuf[off++] = 'L';
7504 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
7505 Assert(off < 32);
7506
7507 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
7508
7509 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
7510 {
7511 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
7512 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
7513 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
7514 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
7515 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
7516 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
7517 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
7518 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
7519 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
7520 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
7521 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
7522 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
7523 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
7524 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
7525 };
7526 if (fFlags)
7527 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
7528 if (s_aFlags[i].fFlag & fFlags)
7529 {
7530 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
7531 pszBuf[off++] = ' ';
7532 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
7533 off += s_aFlags[i].cchName;
7534 fFlags &= ~s_aFlags[i].fFlag;
7535 if (!fFlags)
7536 break;
7537 }
7538 pszBuf[off] = '\0';
7539
7540 return pszBuf;
7541}
7542
7543
7544DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
7545{
7546 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
7547
7548 char szDisBuf[512];
7549 DISSTATE Dis;
7550 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
7551 uint32_t const cNative = pTb->Native.cInstructions;
7552 uint32_t offNative = 0;
7553#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7554 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
7555#endif
7556 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
7557 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
7558 : DISCPUMODE_64BIT;
7559#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7560 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
7561#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7562 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
7563#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
7564# error "Port me"
7565#else
7566 csh hDisasm = ~(size_t)0;
7567# if defined(RT_ARCH_AMD64)
7568 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
7569# elif defined(RT_ARCH_ARM64)
7570 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
7571# else
7572# error "Port me"
7573# endif
7574 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
7575#endif
7576
7577 /*
7578 * Print TB info.
7579 */
7580 pHlp->pfnPrintf(pHlp,
7581 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
7582 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
7583 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
7584 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
7585#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7586 if (pDbgInfo && pDbgInfo->cEntries > 1)
7587 {
7588 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
7589
7590 /*
7591 * This disassembly is driven by the debug info which follows the native
7592 * code and indicates when it starts with the next guest instructions,
7593 * where labels are and such things.
7594 */
7595 uint32_t idxThreadedCall = 0;
7596 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
7597 uint8_t idxRange = UINT8_MAX;
7598 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
7599 uint32_t offRange = 0;
7600 uint32_t offOpcodes = 0;
7601 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
7602 uint32_t const cDbgEntries = pDbgInfo->cEntries;
7603 uint32_t iDbgEntry = 1;
7604 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
7605
7606 while (offNative < cNative)
7607 {
7608 /* If we're at or have passed the point where the next chunk of debug
7609 info starts, process it. */
7610 if (offDbgNativeNext <= offNative)
7611 {
7612 offDbgNativeNext = UINT32_MAX;
7613 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
7614 {
7615 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
7616 {
7617 case kIemTbDbgEntryType_GuestInstruction:
7618 {
7619 /* Did the exec flag change? */
7620 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
7621 {
7622 pHlp->pfnPrintf(pHlp,
7623 " fExec change %#08x -> %#08x %s\n",
7624 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
7625 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
7626 szDisBuf, sizeof(szDisBuf)));
7627 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
7628 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
7629 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
7630 : DISCPUMODE_64BIT;
7631 }
7632
7633 /* New opcode range? We need to fend up a spurious debug info entry here for cases
7634 where the compilation was aborted before the opcode was recorded and the actual
7635 instruction was translated to a threaded call. This may happen when we run out
7636 of ranges, or when some complicated interrupts/FFs are found to be pending or
7637 similar. So, we just deal with it here rather than in the compiler code as it
7638 is a lot simpler to do up here. */
7639 if ( idxRange == UINT8_MAX
7640 || idxRange >= cRanges
7641 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
7642 {
7643 idxRange += 1;
7644 if (idxRange < cRanges)
7645 offRange = 0;
7646 else
7647 continue;
7648 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
7649 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
7650 + (pTb->aRanges[idxRange].idxPhysPage == 0
7651 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
7652 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
7653 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
7654 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
7655 pTb->aRanges[idxRange].idxPhysPage);
7656 }
7657
7658 /* Disassemble the instruction. */
7659 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
7660 uint32_t cbInstr = 1;
7661 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
7662 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
7663 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
7664 if (RT_SUCCESS(rc))
7665 {
7666 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7667 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7668 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7669 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7670
7671 static unsigned const s_offMarker = 55;
7672 static char const s_szMarker[] = " ; <--- guest";
7673 if (cch < s_offMarker)
7674 {
7675 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
7676 cch = s_offMarker;
7677 }
7678 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
7679 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
7680
7681 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
7682 }
7683 else
7684 {
7685 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
7686 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
7687 cbInstr = 1;
7688 }
7689 GCPhysPc += cbInstr;
7690 offOpcodes += cbInstr;
7691 offRange += cbInstr;
7692 continue;
7693 }
7694
7695 case kIemTbDbgEntryType_ThreadedCall:
7696 pHlp->pfnPrintf(pHlp,
7697 " Call #%u to %s (%u args)%s\n",
7698 idxThreadedCall,
7699 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
7700 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
7701 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
7702 idxThreadedCall++;
7703 continue;
7704
7705 case kIemTbDbgEntryType_GuestRegShadowing:
7706 {
7707 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
7708 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
7709 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
7710 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
7711 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
7712 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
7713 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
7714 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
7715 else
7716 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
7717 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
7718 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
7719 continue;
7720 }
7721
7722 case kIemTbDbgEntryType_Label:
7723 {
7724 const char *pszName = "what_the_fudge";
7725 const char *pszComment = "";
7726 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
7727 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
7728 {
7729 case kIemNativeLabelType_Return:
7730 pszName = "Return";
7731 break;
7732 case kIemNativeLabelType_ReturnBreak:
7733 pszName = "ReturnBreak";
7734 break;
7735 case kIemNativeLabelType_ReturnWithFlags:
7736 pszName = "ReturnWithFlags";
7737 break;
7738 case kIemNativeLabelType_NonZeroRetOrPassUp:
7739 pszName = "NonZeroRetOrPassUp";
7740 break;
7741 case kIemNativeLabelType_RaiseGp0:
7742 pszName = "RaiseGp0";
7743 break;
7744 case kIemNativeLabelType_If:
7745 pszName = "If";
7746 fNumbered = true;
7747 break;
7748 case kIemNativeLabelType_Else:
7749 pszName = "Else";
7750 fNumbered = true;
7751 pszComment = " ; regs state restored pre-if-block";
7752 break;
7753 case kIemNativeLabelType_Endif:
7754 pszName = "Endif";
7755 fNumbered = true;
7756 break;
7757 case kIemNativeLabelType_CheckIrq:
7758 pszName = "CheckIrq_CheckVM";
7759 fNumbered = true;
7760 break;
7761 case kIemNativeLabelType_Invalid:
7762 case kIemNativeLabelType_End:
7763 break;
7764 }
7765 if (fNumbered)
7766 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
7767 else
7768 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
7769 continue;
7770 }
7771
7772 case kIemTbDbgEntryType_NativeOffset:
7773 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
7774 Assert(offDbgNativeNext > offNative);
7775 break;
7776
7777 default:
7778 AssertFailed();
7779 }
7780 iDbgEntry++;
7781 break;
7782 }
7783 }
7784
7785 /*
7786 * Disassemble the next native instruction.
7787 */
7788 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
7789# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
7790 uint32_t cbInstr = sizeof(paNative[0]);
7791 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
7792 if (RT_SUCCESS(rc))
7793 {
7794# if defined(RT_ARCH_AMD64)
7795 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
7796 {
7797 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
7798 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
7799 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
7800 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
7801 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
7802 uInfo & 0x8000 ? " - recompiled" : "");
7803 else
7804 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
7805 }
7806 else
7807# endif
7808 {
7809# ifdef RT_ARCH_AMD64
7810 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7811 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7812 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7813 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7814# elif defined(RT_ARCH_ARM64)
7815 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
7816 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7817 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7818# else
7819# error "Port me"
7820# endif
7821 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
7822 }
7823 }
7824 else
7825 {
7826# if defined(RT_ARCH_AMD64)
7827 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
7828 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
7829# elif defined(RT_ARCH_ARM64)
7830 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
7831# else
7832# error "Port me"
7833# endif
7834 cbInstr = sizeof(paNative[0]);
7835 }
7836 offNative += cbInstr / sizeof(paNative[0]);
7837
7838# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7839 cs_insn *pInstr;
7840 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
7841 (uintptr_t)pNativeCur, 1, &pInstr);
7842 if (cInstrs > 0)
7843 {
7844 Assert(cInstrs == 1);
7845# if defined(RT_ARCH_AMD64)
7846 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
7847 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
7848# else
7849 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
7850 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
7851# endif
7852 offNative += pInstr->size / sizeof(*pNativeCur);
7853 cs_free(pInstr, cInstrs);
7854 }
7855 else
7856 {
7857# if defined(RT_ARCH_AMD64)
7858 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
7859 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
7860# else
7861 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
7862# endif
7863 offNative++;
7864 }
7865# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7866 }
7867 }
7868 else
7869#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
7870 {
7871 /*
7872 * No debug info, just disassemble the x86 code and then the native code.
7873 *
7874 * First the guest code:
7875 */
7876 for (unsigned i = 0; i < pTb->cRanges; i++)
7877 {
7878 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
7879 + (pTb->aRanges[i].idxPhysPage == 0
7880 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
7881 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
7882 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
7883 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
7884 unsigned off = pTb->aRanges[i].offOpcodes;
7885 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
7886 while (off < cbOpcodes)
7887 {
7888 uint32_t cbInstr = 1;
7889 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
7890 &pTb->pabOpcodes[off], cbOpcodes - off,
7891 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
7892 if (RT_SUCCESS(rc))
7893 {
7894 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7895 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7896 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7897 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7898 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
7899 GCPhysPc += cbInstr;
7900 off += cbInstr;
7901 }
7902 else
7903 {
7904 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
7905 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
7906 break;
7907 }
7908 }
7909 }
7910
7911 /*
7912 * Then the native code:
7913 */
7914 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
7915 while (offNative < cNative)
7916 {
7917 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
7918# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
7919 uint32_t cbInstr = sizeof(paNative[0]);
7920 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
7921 if (RT_SUCCESS(rc))
7922 {
7923# if defined(RT_ARCH_AMD64)
7924 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
7925 {
7926 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
7927 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
7928 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
7929 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
7930 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
7931 uInfo & 0x8000 ? " - recompiled" : "");
7932 else
7933 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
7934 }
7935 else
7936# endif
7937 {
7938# ifdef RT_ARCH_AMD64
7939 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
7940 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
7941 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7942 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7943# elif defined(RT_ARCH_ARM64)
7944 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
7945 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
7946 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
7947# else
7948# error "Port me"
7949# endif
7950 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
7951 }
7952 }
7953 else
7954 {
7955# if defined(RT_ARCH_AMD64)
7956 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
7957 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
7958# else
7959 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
7960# endif
7961 cbInstr = sizeof(paNative[0]);
7962 }
7963 offNative += cbInstr / sizeof(paNative[0]);
7964
7965# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7966 cs_insn *pInstr;
7967 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
7968 (uintptr_t)pNativeCur, 1, &pInstr);
7969 if (cInstrs > 0)
7970 {
7971 Assert(cInstrs == 1);
7972# if defined(RT_ARCH_AMD64)
7973 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
7974 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
7975# else
7976 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
7977 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
7978# endif
7979 offNative += pInstr->size / sizeof(*pNativeCur);
7980 cs_free(pInstr, cInstrs);
7981 }
7982 else
7983 {
7984# if defined(RT_ARCH_AMD64)
7985 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
7986 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
7987# else
7988 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
7989# endif
7990 offNative++;
7991 }
7992# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
7993 }
7994 }
7995
7996#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
7997 /* Cleanup. */
7998 cs_close(&hDisasm);
7999#endif
8000}
8001
8002
8003/**
8004 * Recompiles the given threaded TB into a native one.
8005 *
8006 * In case of failure the translation block will be returned as-is.
8007 *
8008 * @returns pTb.
8009 * @param pVCpu The cross context virtual CPU structure of the calling
8010 * thread.
8011 * @param pTb The threaded translation to recompile to native.
8012 */
8013DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8014{
8015 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
8016
8017 /*
8018 * The first time thru, we allocate the recompiler state, the other times
8019 * we just need to reset it before using it again.
8020 */
8021 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
8022 if (RT_LIKELY(pReNative))
8023 iemNativeReInit(pReNative, pTb);
8024 else
8025 {
8026 pReNative = iemNativeInit(pVCpu, pTb);
8027 AssertReturn(pReNative, pTb);
8028 }
8029
8030 /*
8031 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
8032 * for aborting if an error happens.
8033 */
8034 uint32_t cCallsLeft = pTb->Thrd.cCalls;
8035#ifdef LOG_ENABLED
8036 uint32_t const cCallsOrg = cCallsLeft;
8037#endif
8038 uint32_t off = 0;
8039 int rc = VINF_SUCCESS;
8040 IEMNATIVE_TRY_SETJMP(pReNative, rc)
8041 {
8042 /*
8043 * Emit prolog code (fixed).
8044 */
8045 off = iemNativeEmitProlog(pReNative, off);
8046
8047 /*
8048 * Convert the calls to native code.
8049 */
8050#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8051 int32_t iGstInstr = -1;
8052#endif
8053#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
8054 uint32_t cThreadedCalls = 0;
8055 uint32_t cRecompiledCalls = 0;
8056#endif
8057 uint32_t fExec = pTb->fFlags;
8058 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
8059 while (cCallsLeft-- > 0)
8060 {
8061 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
8062
8063 /*
8064 * Debug info and assembly markup.
8065 */
8066 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
8067 fExec = pCallEntry->auParams[0];
8068#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8069 iemNativeDbgInfoAddNativeOffset(pReNative, off);
8070 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
8071 {
8072 if (iGstInstr < (int32_t)pTb->cInstructions)
8073 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
8074 else
8075 Assert(iGstInstr == pTb->cInstructions);
8076 iGstInstr = pCallEntry->idxInstr;
8077 }
8078 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
8079#endif
8080#if defined(VBOX_STRICT)
8081 off = iemNativeEmitMarker(pReNative, off,
8082 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
8083 pCallEntry->enmFunction));
8084#endif
8085#if defined(VBOX_STRICT)
8086 iemNativeRegAssertSanity(pReNative);
8087#endif
8088
8089 /*
8090 * Actual work.
8091 */
8092 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
8093 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "" : "(todo)"));
8094 if (pfnRecom) /** @todo stats on this. */
8095 {
8096 off = pfnRecom(pReNative, off, pCallEntry);
8097 STAM_REL_STATS({cRecompiledCalls++;});
8098 }
8099 else
8100 {
8101 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
8102 STAM_REL_STATS({cThreadedCalls++;});
8103 }
8104 Assert(off <= pReNative->cInstrBufAlloc);
8105 Assert(pReNative->cCondDepth == 0);
8106
8107 /*
8108 * Advance.
8109 */
8110 pCallEntry++;
8111 }
8112
8113 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
8114 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
8115 if (!cThreadedCalls)
8116 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
8117
8118 /*
8119 * Emit the epilog code.
8120 */
8121 uint32_t idxReturnLabel;
8122 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
8123
8124 /*
8125 * Generate special jump labels.
8126 */
8127 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
8128 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
8129 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
8130 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
8131 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
8132 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
8133 }
8134 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
8135 {
8136 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
8137 return pTb;
8138 }
8139 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
8140 Assert(off <= pReNative->cInstrBufAlloc);
8141
8142 /*
8143 * Make sure all labels has been defined.
8144 */
8145 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
8146#ifdef VBOX_STRICT
8147 uint32_t const cLabels = pReNative->cLabels;
8148 for (uint32_t i = 0; i < cLabels; i++)
8149 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
8150#endif
8151
8152 /*
8153 * Allocate executable memory, copy over the code we've generated.
8154 */
8155 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
8156 if (pTbAllocator->pDelayedFreeHead)
8157 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
8158
8159 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
8160 AssertReturn(paFinalInstrBuf, pTb);
8161 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
8162
8163 /*
8164 * Apply fixups.
8165 */
8166 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
8167 uint32_t const cFixups = pReNative->cFixups;
8168 for (uint32_t i = 0; i < cFixups; i++)
8169 {
8170 Assert(paFixups[i].off < off);
8171 Assert(paFixups[i].idxLabel < cLabels);
8172 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
8173 switch (paFixups[i].enmType)
8174 {
8175#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
8176 case kIemNativeFixupType_Rel32:
8177 Assert(paFixups[i].off + 4 <= off);
8178 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8179 continue;
8180
8181#elif defined(RT_ARCH_ARM64)
8182 case kIemNativeFixupType_RelImm26At0:
8183 {
8184 Assert(paFixups[i].off < off);
8185 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8186 Assert(offDisp >= -262144 && offDisp < 262144);
8187 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
8188 continue;
8189 }
8190
8191 case kIemNativeFixupType_RelImm19At5:
8192 {
8193 Assert(paFixups[i].off < off);
8194 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8195 Assert(offDisp >= -262144 && offDisp < 262144);
8196 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
8197 continue;
8198 }
8199
8200 case kIemNativeFixupType_RelImm14At5:
8201 {
8202 Assert(paFixups[i].off < off);
8203 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
8204 Assert(offDisp >= -8192 && offDisp < 8192);
8205 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
8206 continue;
8207 }
8208
8209#endif
8210 case kIemNativeFixupType_Invalid:
8211 case kIemNativeFixupType_End:
8212 break;
8213 }
8214 AssertFailed();
8215 }
8216
8217 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
8218 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
8219
8220 /*
8221 * Convert the translation block.
8222 */
8223 RTMemFree(pTb->Thrd.paCalls);
8224 pTb->Native.paInstructions = paFinalInstrBuf;
8225 pTb->Native.cInstructions = off;
8226 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
8227#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8228 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
8229 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
8230#endif
8231
8232 Assert(pTbAllocator->cThreadedTbs > 0);
8233 pTbAllocator->cThreadedTbs -= 1;
8234 pTbAllocator->cNativeTbs += 1;
8235 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
8236
8237#ifdef LOG_ENABLED
8238 /*
8239 * Disassemble to the log if enabled.
8240 */
8241 if (LogIs3Enabled())
8242 {
8243 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
8244 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
8245# ifdef DEBUG_bird
8246 RTLogFlush(NULL);
8247# endif
8248 }
8249#endif
8250
8251 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
8252 return pTb;
8253}
8254
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette