VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101850

Last change on this file since 101850 was 101850, checked in by vboxsync, 15 months ago

VMM/IEM: Replaced all IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF use with IEM_MC_CLEAR_HIGH_GREG_U64 and removed the MC, as it forced argument variables to be used after IEM_MC_CALL_XXXX which made recompiling more complicated. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 288.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101850 2023-11-06 10:13:31Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94
95#include "IEMInline.h"
96#include "IEMThreadedFunctions.h"
97#include "IEMN8veRecompiler.h"
98#include "IEMNativeFunctions.h"
99
100
101/*
102 * Narrow down configs here to avoid wasting time on unused configs here.
103 * Note! Same checks in IEMAllThrdRecompiler.cpp.
104 */
105
106#ifndef IEM_WITH_CODE_TLB
107# error The code TLB must be enabled for the recompiler.
108#endif
109
110#ifndef IEM_WITH_DATA_TLB
111# error The data TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_SETJMP
115# error The setjmp approach must be enabled for the recompiler.
116#endif
117
118
119/*********************************************************************************************************************************
120* Defined Constants And Macros *
121*********************************************************************************************************************************/
122/** Always count instructions for now. */
123#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
124
125
126/*********************************************************************************************************************************
127* Internal Functions *
128*********************************************************************************************************************************/
129#ifdef VBOX_STRICT
130static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
131 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
132#endif
133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
134static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
135static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
136#endif
137
138
139/*********************************************************************************************************************************
140* Executable Memory Allocator *
141*********************************************************************************************************************************/
142/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
143 * Use an alternative chunk sub-allocator that does store internal data
144 * in the chunk.
145 *
146 * Using the RTHeapSimple is not practial on newer darwin systems where
147 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
148 * memory. We would have to change the protection of the whole chunk for
149 * every call to RTHeapSimple, which would be rather expensive.
150 *
151 * This alternative implemenation let restrict page protection modifications
152 * to the pages backing the executable memory we just allocated.
153 */
154#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155/** The chunk sub-allocation unit size in bytes. */
156#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
157/** The chunk sub-allocation unit size as a shift factor. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
159
160#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
161# ifdef IEMNATIVE_USE_GDB_JIT
162# define IEMNATIVE_USE_GDB_JIT_ET_DYN
163
164/** GDB JIT: Code entry. */
165typedef struct GDBJITCODEENTRY
166{
167 struct GDBJITCODEENTRY *pNext;
168 struct GDBJITCODEENTRY *pPrev;
169 uint8_t *pbSymFile;
170 uint64_t cbSymFile;
171} GDBJITCODEENTRY;
172
173/** GDB JIT: Actions. */
174typedef enum GDBJITACTIONS : uint32_t
175{
176 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
177} GDBJITACTIONS;
178
179/** GDB JIT: Descriptor. */
180typedef struct GDBJITDESCRIPTOR
181{
182 uint32_t uVersion;
183 GDBJITACTIONS enmAction;
184 GDBJITCODEENTRY *pRelevant;
185 GDBJITCODEENTRY *pHead;
186 /** Our addition: */
187 GDBJITCODEENTRY *pTail;
188} GDBJITDESCRIPTOR;
189
190/** GDB JIT: Our simple symbol file data. */
191typedef struct GDBJITSYMFILE
192{
193 Elf64_Ehdr EHdr;
194# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
195 Elf64_Shdr aShdrs[5];
196# else
197 Elf64_Shdr aShdrs[7];
198 Elf64_Phdr aPhdrs[2];
199# endif
200 /** The dwarf ehframe data for the chunk. */
201 uint8_t abEhFrame[512];
202 char szzStrTab[128];
203 Elf64_Sym aSymbols[3];
204# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
205 Elf64_Sym aDynSyms[2];
206 Elf64_Dyn aDyn[6];
207# endif
208} GDBJITSYMFILE;
209
210extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
211extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
212
213/** Init once for g_IemNativeGdbJitLock. */
214static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
215/** Init once for the critical section. */
216static RTCRITSECT g_IemNativeGdbJitLock;
217
218/** GDB reads the info here. */
219GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
220
221/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
222DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
223{
224 ASMNopPause();
225}
226
227/** @callback_method_impl{FNRTONCE} */
228static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
229{
230 RT_NOREF(pvUser);
231 return RTCritSectInit(&g_IemNativeGdbJitLock);
232}
233
234
235# endif /* IEMNATIVE_USE_GDB_JIT */
236
237/**
238 * Per-chunk unwind info for non-windows hosts.
239 */
240typedef struct IEMEXECMEMCHUNKEHFRAME
241{
242# ifdef IEMNATIVE_USE_LIBUNWIND
243 /** The offset of the FDA into abEhFrame. */
244 uintptr_t offFda;
245# else
246 /** 'struct object' storage area. */
247 uint8_t abObject[1024];
248# endif
249# ifdef IEMNATIVE_USE_GDB_JIT
250# if 0
251 /** The GDB JIT 'symbol file' data. */
252 GDBJITSYMFILE GdbJitSymFile;
253# endif
254 /** The GDB JIT list entry. */
255 GDBJITCODEENTRY GdbJitEntry;
256# endif
257 /** The dwarf ehframe data for the chunk. */
258 uint8_t abEhFrame[512];
259} IEMEXECMEMCHUNKEHFRAME;
260/** Pointer to per-chunk info info for non-windows hosts. */
261typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
262#endif
263
264
265/**
266 * An chunk of executable memory.
267 */
268typedef struct IEMEXECMEMCHUNK
269{
270#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
271 /** Number of free items in this chunk. */
272 uint32_t cFreeUnits;
273 /** Hint were to start searching for free space in the allocation bitmap. */
274 uint32_t idxFreeHint;
275#else
276 /** The heap handle. */
277 RTHEAPSIMPLE hHeap;
278#endif
279 /** Pointer to the chunk. */
280 void *pvChunk;
281#ifdef IN_RING3
282 /**
283 * Pointer to the unwind information.
284 *
285 * This is used during C++ throw and longjmp (windows and probably most other
286 * platforms). Some debuggers (windbg) makes use of it as well.
287 *
288 * Windows: This is allocated from hHeap on windows because (at least for
289 * AMD64) the UNWIND_INFO structure address in the
290 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
291 *
292 * Others: Allocated from the regular heap to avoid unnecessary executable data
293 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
294 void *pvUnwindInfo;
295#elif defined(IN_RING0)
296 /** Allocation handle. */
297 RTR0MEMOBJ hMemObj;
298#endif
299} IEMEXECMEMCHUNK;
300/** Pointer to a memory chunk. */
301typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
302
303
304/**
305 * Executable memory allocator for the native recompiler.
306 */
307typedef struct IEMEXECMEMALLOCATOR
308{
309 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
310 uint32_t uMagic;
311
312 /** The chunk size. */
313 uint32_t cbChunk;
314 /** The maximum number of chunks. */
315 uint32_t cMaxChunks;
316 /** The current number of chunks. */
317 uint32_t cChunks;
318 /** Hint where to start looking for available memory. */
319 uint32_t idxChunkHint;
320 /** Statistics: Current number of allocations. */
321 uint32_t cAllocations;
322
323 /** The total amount of memory available. */
324 uint64_t cbTotal;
325 /** Total amount of free memory. */
326 uint64_t cbFree;
327 /** Total amount of memory allocated. */
328 uint64_t cbAllocated;
329
330#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
331 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
332 *
333 * Since the chunk size is a power of two and the minimum chunk size is a lot
334 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
335 * require a whole number of uint64_t elements in the allocation bitmap. So,
336 * for sake of simplicity, they are allocated as one continous chunk for
337 * simplicity/laziness. */
338 uint64_t *pbmAlloc;
339 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
340 uint32_t cUnitsPerChunk;
341 /** Number of bitmap elements per chunk (for quickly locating the bitmap
342 * portion corresponding to an chunk). */
343 uint32_t cBitmapElementsPerChunk;
344#else
345 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
346 * @{ */
347 /** The size of the heap internal block header. This is used to adjust the
348 * request memory size to make sure there is exacly enough room for a header at
349 * the end of the blocks we allocate before the next 64 byte alignment line. */
350 uint32_t cbHeapBlockHdr;
351 /** The size of initial heap allocation required make sure the first
352 * allocation is correctly aligned. */
353 uint32_t cbHeapAlignTweak;
354 /** The alignment tweak allocation address. */
355 void *pvAlignTweak;
356 /** @} */
357#endif
358
359#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
360 /** Pointer to the array of unwind info running parallel to aChunks (same
361 * allocation as this structure, located after the bitmaps).
362 * (For Windows, the structures must reside in 32-bit RVA distance to the
363 * actual chunk, so they are allocated off the chunk.) */
364 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
365#endif
366
367 /** The allocation chunks. */
368 RT_FLEXIBLE_ARRAY_EXTENSION
369 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
370} IEMEXECMEMALLOCATOR;
371/** Pointer to an executable memory allocator. */
372typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
373
374/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
375#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
376
377
378static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
379
380
381/**
382 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
383 * the heap statistics.
384 */
385static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
386 uint32_t cbReq, uint32_t idxChunk)
387{
388 pExecMemAllocator->cAllocations += 1;
389 pExecMemAllocator->cbAllocated += cbReq;
390#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
391 pExecMemAllocator->cbFree -= cbReq;
392#else
393 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
394#endif
395 pExecMemAllocator->idxChunkHint = idxChunk;
396
397#ifdef RT_OS_DARWIN
398 /*
399 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
400 * on darwin. So, we mark the pages returned as read+write after alloc and
401 * expect the caller to call iemExecMemAllocatorReadyForUse when done
402 * writing to the allocation.
403 *
404 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
405 * for details.
406 */
407 /** @todo detect if this is necessary... it wasn't required on 10.15 or
408 * whatever older version it was. */
409 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
410 AssertRC(rc);
411#endif
412
413 return pvRet;
414}
415
416
417#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
418static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
419 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
420{
421 /*
422 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
423 */
424 Assert(!(cToScan & 63));
425 Assert(!(idxFirst & 63));
426 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
427 pbmAlloc += idxFirst / 64;
428
429 /*
430 * Scan the bitmap for cReqUnits of consequtive clear bits
431 */
432 /** @todo This can probably be done more efficiently for non-x86 systems. */
433 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
434 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
435 {
436 uint32_t idxAddBit = 1;
437 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
438 idxAddBit++;
439 if (idxAddBit >= cReqUnits)
440 {
441 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
442
443 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
444 pChunk->cFreeUnits -= cReqUnits;
445 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
446
447 void * const pvRet = (uint8_t *)pChunk->pvChunk
448 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
449
450 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
451 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
452 }
453
454 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
455 }
456 return NULL;
457}
458#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
459
460
461static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
462{
463#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
464 /*
465 * Figure out how much to allocate.
466 */
467 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
468 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
469 {
470 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
471 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
472 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
473 {
474 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
475 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
476 if (pvRet)
477 return pvRet;
478 }
479 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
480 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
481 cReqUnits, idxChunk);
482 }
483#else
484 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
485 if (pvRet)
486 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
487#endif
488 return NULL;
489
490}
491
492
493/**
494 * Allocates @a cbReq bytes of executable memory.
495 *
496 * @returns Pointer to the memory, NULL if out of memory or other problem
497 * encountered.
498 * @param pVCpu The cross context virtual CPU structure of the calling
499 * thread.
500 * @param cbReq How many bytes are required.
501 */
502static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
503{
504 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
505 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
506 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
507
508 /*
509 * Adjust the request size so it'll fit the allocator alignment/whatnot.
510 *
511 * For the RTHeapSimple allocator this means to follow the logic described
512 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
513 * existing chunks if we think we've got sufficient free memory around.
514 *
515 * While for the alternative one we just align it up to a whole unit size.
516 */
517#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
518 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
519#else
520 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
521#endif
522 if (cbReq <= pExecMemAllocator->cbFree)
523 {
524 uint32_t const cChunks = pExecMemAllocator->cChunks;
525 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
526 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
527 {
528 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
529 if (pvRet)
530 return pvRet;
531 }
532 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
533 {
534 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
535 if (pvRet)
536 return pvRet;
537 }
538 }
539
540 /*
541 * Can we grow it with another chunk?
542 */
543 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
544 {
545 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
546 AssertLogRelRCReturn(rc, NULL);
547
548 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 AssertFailed();
553 }
554
555 /* What now? Prune native translation blocks from the cache? */
556 AssertFailed();
557 return NULL;
558}
559
560
561/** This is a hook that we may need later for changing memory protection back
562 * to readonly+exec */
563static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
564{
565#ifdef RT_OS_DARWIN
566 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
567 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
568 AssertRC(rc); RT_NOREF(pVCpu);
569
570 /*
571 * Flush the instruction cache:
572 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
573 */
574 /* sys_dcache_flush(pv, cb); - not necessary */
575 sys_icache_invalidate(pv, cb);
576#else
577 RT_NOREF(pVCpu, pv, cb);
578#endif
579}
580
581
582/**
583 * Frees executable memory.
584 */
585void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
586{
587 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
588 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
589 Assert(pv);
590#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
591 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
592#else
593 Assert(!((uintptr_t)pv & 63));
594#endif
595
596 /* Align the size as we did when allocating the block. */
597#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
598 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
599#else
600 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
601#endif
602
603 /* Free it / assert sanity. */
604#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
605 uint32_t const cChunks = pExecMemAllocator->cChunks;
606 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
607 bool fFound = false;
608 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
609 {
610 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
611 fFound = offChunk < cbChunk;
612 if (fFound)
613 {
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
616 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
617
618 /* Check that it's valid and free it. */
619 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
620 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
621 for (uint32_t i = 1; i < cReqUnits; i++)
622 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
623 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
624
625 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
626 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
627
628 /* Update the stats. */
629 pExecMemAllocator->cbAllocated -= cb;
630 pExecMemAllocator->cbFree += cb;
631 pExecMemAllocator->cAllocations -= 1;
632 return;
633#else
634 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
635 break;
636#endif
637 }
638 }
639# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
640 AssertFailed();
641# else
642 Assert(fFound);
643# endif
644#endif
645
646#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
647 /* Update stats while cb is freshly calculated.*/
648 pExecMemAllocator->cbAllocated -= cb;
649 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
650 pExecMemAllocator->cAllocations -= 1;
651
652 /* Free it. */
653 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
654#endif
655}
656
657
658
659#ifdef IN_RING3
660# ifdef RT_OS_WINDOWS
661
662/**
663 * Initializes the unwind info structures for windows hosts.
664 */
665static int
666iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
667 void *pvChunk, uint32_t idxChunk)
668{
669 RT_NOREF(pVCpu);
670
671 /*
672 * The AMD64 unwind opcodes.
673 *
674 * This is a program that starts with RSP after a RET instruction that
675 * ends up in recompiled code, and the operations we describe here will
676 * restore all non-volatile registers and bring RSP back to where our
677 * RET address is. This means it's reverse order from what happens in
678 * the prologue.
679 *
680 * Note! Using a frame register approach here both because we have one
681 * and but mainly because the UWOP_ALLOC_LARGE argument values
682 * would be a pain to write initializers for. On the positive
683 * side, we're impervious to changes in the the stack variable
684 * area can can deal with dynamic stack allocations if necessary.
685 */
686 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
687 {
688 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
689 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
690 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
691 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
692 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
693 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
694 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
695 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
696 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
697 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
698 };
699 union
700 {
701 IMAGE_UNWIND_INFO Info;
702 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
703 } s_UnwindInfo =
704 {
705 {
706 /* .Version = */ 1,
707 /* .Flags = */ 0,
708 /* .SizeOfProlog = */ 16, /* whatever */
709 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
710 /* .FrameRegister = */ X86_GREG_xBP,
711 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
712 }
713 };
714 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
715 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
716
717 /*
718 * Calc how much space we need and allocate it off the exec heap.
719 */
720 unsigned const cFunctionEntries = 1;
721 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
722 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
723# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
724 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
725 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
726 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
727# else
728 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
729 - pExecMemAllocator->cbHeapBlockHdr;
730 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
731 32 /*cbAlignment*/);
732# endif
733 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
734 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
735
736 /*
737 * Initialize the structures.
738 */
739 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
740
741 paFunctions[0].BeginAddress = 0;
742 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
743 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
744
745 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
746 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
747
748 /*
749 * Register it.
750 */
751 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
752 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
753
754 return VINF_SUCCESS;
755}
756
757
758# else /* !RT_OS_WINDOWS */
759
760/**
761 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
762 */
763DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
764{
765 if (iValue >= 64)
766 {
767 Assert(iValue < 0x2000);
768 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
769 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
770 }
771 else if (iValue >= 0)
772 *Ptr.pb++ = (uint8_t)iValue;
773 else if (iValue > -64)
774 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
775 else
776 {
777 Assert(iValue > -0x2000);
778 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
779 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
780 }
781 return Ptr;
782}
783
784
785/**
786 * Emits an ULEB128 encoded value (up to 64-bit wide).
787 */
788DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
789{
790 while (uValue >= 0x80)
791 {
792 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
793 uValue >>= 7;
794 }
795 *Ptr.pb++ = (uint8_t)uValue;
796 return Ptr;
797}
798
799
800/**
801 * Emits a CFA rule as register @a uReg + offset @a off.
802 */
803DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
804{
805 *Ptr.pb++ = DW_CFA_def_cfa;
806 Ptr = iemDwarfPutUleb128(Ptr, uReg);
807 Ptr = iemDwarfPutUleb128(Ptr, off);
808 return Ptr;
809}
810
811
812/**
813 * Emits a register (@a uReg) save location:
814 * CFA + @a off * data_alignment_factor
815 */
816DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
817{
818 if (uReg < 0x40)
819 *Ptr.pb++ = DW_CFA_offset | uReg;
820 else
821 {
822 *Ptr.pb++ = DW_CFA_offset_extended;
823 Ptr = iemDwarfPutUleb128(Ptr, uReg);
824 }
825 Ptr = iemDwarfPutUleb128(Ptr, off);
826 return Ptr;
827}
828
829
830# if 0 /* unused */
831/**
832 * Emits a register (@a uReg) save location, using signed offset:
833 * CFA + @a offSigned * data_alignment_factor
834 */
835DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
836{
837 *Ptr.pb++ = DW_CFA_offset_extended_sf;
838 Ptr = iemDwarfPutUleb128(Ptr, uReg);
839 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
840 return Ptr;
841}
842# endif
843
844
845/**
846 * Initializes the unwind info section for non-windows hosts.
847 */
848static int
849iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
850 void *pvChunk, uint32_t idxChunk)
851{
852 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
853 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
854
855 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
856
857 /*
858 * Generate the CIE first.
859 */
860# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
861 uint8_t const iDwarfVer = 3;
862# else
863 uint8_t const iDwarfVer = 4;
864# endif
865 RTPTRUNION const PtrCie = Ptr;
866 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
867 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
868 *Ptr.pb++ = iDwarfVer; /* DwARF version */
869 *Ptr.pb++ = 0; /* Augmentation. */
870 if (iDwarfVer >= 4)
871 {
872 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
873 *Ptr.pb++ = 0; /* Segment selector size. */
874 }
875# ifdef RT_ARCH_AMD64
876 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
877# else
878 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
879# endif
880 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
881# ifdef RT_ARCH_AMD64
882 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
883# elif defined(RT_ARCH_ARM64)
884 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
885# else
886# error "port me"
887# endif
888 /* Initial instructions: */
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
891 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
892 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
893 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
894 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
895 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
896 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
897 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
898# elif defined(RT_ARCH_ARM64)
899# if 1
900 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
901# else
902 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
903# endif
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
916 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
917 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
918# else
919# error "port me"
920# endif
921 while ((Ptr.u - PtrCie.u) & 3)
922 *Ptr.pb++ = DW_CFA_nop;
923 /* Finalize the CIE size. */
924 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
925
926 /*
927 * Generate an FDE for the whole chunk area.
928 */
929# ifdef IEMNATIVE_USE_LIBUNWIND
930 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
931# endif
932 RTPTRUNION const PtrFde = Ptr;
933 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
934 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
935 Ptr.pu32++;
936 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
937 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
938# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
939 *Ptr.pb++ = DW_CFA_nop;
940# endif
941 while ((Ptr.u - PtrFde.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the FDE size. */
944 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
945
946 /* Terminator entry. */
947 *Ptr.pu32++ = 0;
948 *Ptr.pu32++ = 0; /* just to be sure... */
949 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
950
951 /*
952 * Register it.
953 */
954# ifdef IEMNATIVE_USE_LIBUNWIND
955 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
956# else
957 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
958 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
959# endif
960
961# ifdef IEMNATIVE_USE_GDB_JIT
962 /*
963 * Now for telling GDB about this (experimental).
964 *
965 * This seems to work best with ET_DYN.
966 */
967 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
968# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
969 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
970 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
971# else
972 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
973 - pExecMemAllocator->cbHeapBlockHdr;
974 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
975# endif
976 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
977 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
978
979 RT_ZERO(*pSymFile);
980
981 /*
982 * The ELF header:
983 */
984 pSymFile->EHdr.e_ident[0] = ELFMAG0;
985 pSymFile->EHdr.e_ident[1] = ELFMAG1;
986 pSymFile->EHdr.e_ident[2] = ELFMAG2;
987 pSymFile->EHdr.e_ident[3] = ELFMAG3;
988 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
989 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
990 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
991 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
992# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
993 pSymFile->EHdr.e_type = ET_DYN;
994# else
995 pSymFile->EHdr.e_type = ET_REL;
996# endif
997# ifdef RT_ARCH_AMD64
998 pSymFile->EHdr.e_machine = EM_AMD64;
999# elif defined(RT_ARCH_ARM64)
1000 pSymFile->EHdr.e_machine = EM_AARCH64;
1001# else
1002# error "port me"
1003# endif
1004 pSymFile->EHdr.e_version = 1; /*?*/
1005 pSymFile->EHdr.e_entry = 0;
1006# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1007 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1008# else
1009 pSymFile->EHdr.e_phoff = 0;
1010# endif
1011 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1012 pSymFile->EHdr.e_flags = 0;
1013 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1014# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1015 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1016 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1017# else
1018 pSymFile->EHdr.e_phentsize = 0;
1019 pSymFile->EHdr.e_phnum = 0;
1020# endif
1021 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1022 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1023 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1024
1025 uint32_t offStrTab = 0;
1026#define APPEND_STR(a_szStr) do { \
1027 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1028 offStrTab += sizeof(a_szStr); \
1029 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1030 } while (0)
1031#define APPEND_STR_FMT(a_szStr, ...) do { \
1032 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1033 offStrTab++; \
1034 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1035 } while (0)
1036
1037 /*
1038 * Section headers.
1039 */
1040 /* Section header #0: NULL */
1041 unsigned i = 0;
1042 APPEND_STR("");
1043 RT_ZERO(pSymFile->aShdrs[i]);
1044 i++;
1045
1046 /* Section header: .eh_frame */
1047 pSymFile->aShdrs[i].sh_name = offStrTab;
1048 APPEND_STR(".eh_frame");
1049 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1050 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1051# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1052 pSymFile->aShdrs[i].sh_offset
1053 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1054# else
1055 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1056 pSymFile->aShdrs[i].sh_offset = 0;
1057# endif
1058
1059 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1060 pSymFile->aShdrs[i].sh_link = 0;
1061 pSymFile->aShdrs[i].sh_info = 0;
1062 pSymFile->aShdrs[i].sh_addralign = 1;
1063 pSymFile->aShdrs[i].sh_entsize = 0;
1064 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1065 i++;
1066
1067 /* Section header: .shstrtab */
1068 unsigned const iShStrTab = i;
1069 pSymFile->EHdr.e_shstrndx = iShStrTab;
1070 pSymFile->aShdrs[i].sh_name = offStrTab;
1071 APPEND_STR(".shstrtab");
1072 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1073 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1074# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1075 pSymFile->aShdrs[i].sh_offset
1076 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1077# else
1078 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1079 pSymFile->aShdrs[i].sh_offset = 0;
1080# endif
1081 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1082 pSymFile->aShdrs[i].sh_link = 0;
1083 pSymFile->aShdrs[i].sh_info = 0;
1084 pSymFile->aShdrs[i].sh_addralign = 1;
1085 pSymFile->aShdrs[i].sh_entsize = 0;
1086 i++;
1087
1088 /* Section header: .symbols */
1089 pSymFile->aShdrs[i].sh_name = offStrTab;
1090 APPEND_STR(".symtab");
1091 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1092 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1093 pSymFile->aShdrs[i].sh_offset
1094 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1096 pSymFile->aShdrs[i].sh_link = iShStrTab;
1097 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1098 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1099 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1100 i++;
1101
1102# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1103 /* Section header: .symbols */
1104 pSymFile->aShdrs[i].sh_name = offStrTab;
1105 APPEND_STR(".dynsym");
1106 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1107 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1108 pSymFile->aShdrs[i].sh_offset
1109 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1110 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1111 pSymFile->aShdrs[i].sh_link = iShStrTab;
1112 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1113 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1114 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1115 i++;
1116# endif
1117
1118# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1119 /* Section header: .dynamic */
1120 pSymFile->aShdrs[i].sh_name = offStrTab;
1121 APPEND_STR(".dynamic");
1122 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1123 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1124 pSymFile->aShdrs[i].sh_offset
1125 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1126 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1127 pSymFile->aShdrs[i].sh_link = iShStrTab;
1128 pSymFile->aShdrs[i].sh_info = 0;
1129 pSymFile->aShdrs[i].sh_addralign = 1;
1130 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1131 i++;
1132# endif
1133
1134 /* Section header: .text */
1135 unsigned const iShText = i;
1136 pSymFile->aShdrs[i].sh_name = offStrTab;
1137 APPEND_STR(".text");
1138 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1139 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1140# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1141 pSymFile->aShdrs[i].sh_offset
1142 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1143# else
1144 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1145 pSymFile->aShdrs[i].sh_offset = 0;
1146# endif
1147 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1148 pSymFile->aShdrs[i].sh_link = 0;
1149 pSymFile->aShdrs[i].sh_info = 0;
1150 pSymFile->aShdrs[i].sh_addralign = 1;
1151 pSymFile->aShdrs[i].sh_entsize = 0;
1152 i++;
1153
1154 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1155
1156# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1157 /*
1158 * The program headers:
1159 */
1160 /* Everything in a single LOAD segment: */
1161 i = 0;
1162 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1163 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1164 pSymFile->aPhdrs[i].p_offset
1165 = pSymFile->aPhdrs[i].p_vaddr
1166 = pSymFile->aPhdrs[i].p_paddr = 0;
1167 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1168 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1169 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1170 i++;
1171 /* The .dynamic segment. */
1172 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1173 pSymFile->aPhdrs[i].p_flags = PF_R;
1174 pSymFile->aPhdrs[i].p_offset
1175 = pSymFile->aPhdrs[i].p_vaddr
1176 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1177 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1178 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1179 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1180 i++;
1181
1182 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1183
1184 /*
1185 * The dynamic section:
1186 */
1187 i = 0;
1188 pSymFile->aDyn[i].d_tag = DT_SONAME;
1189 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1190 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1191 i++;
1192 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1193 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1194 i++;
1195 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1196 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1197 i++;
1198 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1199 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1202 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_NULL;
1205 i++;
1206 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1207# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1208
1209 /*
1210 * Symbol tables:
1211 */
1212 /** @todo gdb doesn't seem to really like this ... */
1213 i = 0;
1214 pSymFile->aSymbols[i].st_name = 0;
1215 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1216 pSymFile->aSymbols[i].st_value = 0;
1217 pSymFile->aSymbols[i].st_size = 0;
1218 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1219 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1220# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1221 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1222# endif
1223 i++;
1224
1225 pSymFile->aSymbols[i].st_name = 0;
1226 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1227 pSymFile->aSymbols[i].st_value = 0;
1228 pSymFile->aSymbols[i].st_size = 0;
1229 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1230 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1231 i++;
1232
1233 pSymFile->aSymbols[i].st_name = offStrTab;
1234 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1235# if 0
1236 pSymFile->aSymbols[i].st_shndx = iShText;
1237 pSymFile->aSymbols[i].st_value = 0;
1238# else
1239 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1240 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1241# endif
1242 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1246 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1247 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1248# endif
1249 i++;
1250
1251 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1252 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1253
1254 /*
1255 * The GDB JIT entry and informing GDB.
1256 */
1257 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1258# if 1
1259 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1260# else
1261 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1262# endif
1263
1264 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1265 RTCritSectEnter(&g_IemNativeGdbJitLock);
1266 pEhFrame->GdbJitEntry.pNext = NULL;
1267 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1268 if (__jit_debug_descriptor.pTail)
1269 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1270 else
1271 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1272 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1273 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1274
1275 /* Notify GDB: */
1276 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1277 __jit_debug_register_code();
1278 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1279 RTCritSectLeave(&g_IemNativeGdbJitLock);
1280
1281# else /* !IEMNATIVE_USE_GDB_JIT */
1282 RT_NOREF(pVCpu);
1283# endif /* !IEMNATIVE_USE_GDB_JIT */
1284
1285 return VINF_SUCCESS;
1286}
1287
1288# endif /* !RT_OS_WINDOWS */
1289#endif /* IN_RING3 */
1290
1291
1292/**
1293 * Adds another chunk to the executable memory allocator.
1294 *
1295 * This is used by the init code for the initial allocation and later by the
1296 * regular allocator function when it's out of memory.
1297 */
1298static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1299{
1300 /* Check that we've room for growth. */
1301 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1302 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1303
1304 /* Allocate a chunk. */
1305#ifdef RT_OS_DARWIN
1306 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1307#else
1308 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1309#endif
1310 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1311
1312#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1313 int rc = VINF_SUCCESS;
1314#else
1315 /* Initialize the heap for the chunk. */
1316 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1317 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1318 AssertRC(rc);
1319 if (RT_SUCCESS(rc))
1320 {
1321 /*
1322 * We want the memory to be aligned on 64 byte, so the first time thru
1323 * here we do some exploratory allocations to see how we can achieve this.
1324 * On subsequent runs we only make an initial adjustment allocation, if
1325 * necessary.
1326 *
1327 * Since we own the heap implementation, we know that the internal block
1328 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1329 * so all we need to wrt allocation size adjustments is to add 32 bytes
1330 * to the size, align up by 64 bytes, and subtract 32 bytes.
1331 *
1332 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1333 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1334 * allocation to force subsequent allocations to return 64 byte aligned
1335 * user areas.
1336 */
1337 if (!pExecMemAllocator->cbHeapBlockHdr)
1338 {
1339 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1340 pExecMemAllocator->cbHeapAlignTweak = 64;
1341 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1342 32 /*cbAlignment*/);
1343 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1344
1345 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1346 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1347 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1348 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1349 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1350
1351 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1352 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1353 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1354 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1355 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1356
1357 RTHeapSimpleFree(hHeap, pvTest2);
1358 RTHeapSimpleFree(hHeap, pvTest1);
1359 }
1360 else
1361 {
1362 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1364 }
1365 if (RT_SUCCESS(rc))
1366#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1367 {
1368 /*
1369 * Add the chunk.
1370 *
1371 * This must be done before the unwind init so windows can allocate
1372 * memory from the chunk when using the alternative sub-allocator.
1373 */
1374 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1375#ifdef IN_RING3
1376 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1377#endif
1378#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1379 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1380#else
1381 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1382 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1383 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1384 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1385#endif
1386
1387 pExecMemAllocator->cChunks = idxChunk + 1;
1388 pExecMemAllocator->idxChunkHint = idxChunk;
1389
1390#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1391 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1392 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1393#else
1394 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1395 pExecMemAllocator->cbTotal += cbFree;
1396 pExecMemAllocator->cbFree += cbFree;
1397#endif
1398
1399#ifdef IN_RING3
1400 /*
1401 * Initialize the unwind information (this cannot really fail atm).
1402 * (This sets pvUnwindInfo.)
1403 */
1404 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1405 if (RT_SUCCESS(rc))
1406#endif
1407 {
1408 return VINF_SUCCESS;
1409 }
1410
1411#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1412 /* Just in case the impossible happens, undo the above up: */
1413 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1414 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1415 pExecMemAllocator->cChunks = idxChunk;
1416 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1417 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1418 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1419 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1420#endif
1421 }
1422#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1423 }
1424#endif
1425 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1426 RT_NOREF(pVCpu);
1427 return rc;
1428}
1429
1430
1431/**
1432 * Initializes the executable memory allocator for native recompilation on the
1433 * calling EMT.
1434 *
1435 * @returns VBox status code.
1436 * @param pVCpu The cross context virtual CPU structure of the calling
1437 * thread.
1438 * @param cbMax The max size of the allocator.
1439 * @param cbInitial The initial allocator size.
1440 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1441 * dependent).
1442 */
1443int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1444{
1445 /*
1446 * Validate input.
1447 */
1448 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1449 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1450 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1451 || cbChunk == 0
1452 || ( RT_IS_POWER_OF_TWO(cbChunk)
1453 && cbChunk >= _1M
1454 && cbChunk <= _256M
1455 && cbChunk <= cbMax),
1456 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1457 VERR_OUT_OF_RANGE);
1458
1459 /*
1460 * Adjust/figure out the chunk size.
1461 */
1462 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1463 {
1464 if (cbMax >= _256M)
1465 cbChunk = _64M;
1466 else
1467 {
1468 if (cbMax < _16M)
1469 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1470 else
1471 cbChunk = (uint32_t)cbMax / 4;
1472 if (!RT_IS_POWER_OF_TWO(cbChunk))
1473 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1474 }
1475 }
1476
1477 if (cbChunk > cbMax)
1478 cbMax = cbChunk;
1479 else
1480 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1481 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1482 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1483
1484 /*
1485 * Allocate and initialize the allocatore instance.
1486 */
1487 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1488#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1489 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1490 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1491 cbNeeded += cbBitmap * cMaxChunks;
1492 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1493 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1494#endif
1495#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1496 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1497 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1498#endif
1499 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1500 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1501 VERR_NO_MEMORY);
1502 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1503 pExecMemAllocator->cbChunk = cbChunk;
1504 pExecMemAllocator->cMaxChunks = cMaxChunks;
1505 pExecMemAllocator->cChunks = 0;
1506 pExecMemAllocator->idxChunkHint = 0;
1507 pExecMemAllocator->cAllocations = 0;
1508 pExecMemAllocator->cbTotal = 0;
1509 pExecMemAllocator->cbFree = 0;
1510 pExecMemAllocator->cbAllocated = 0;
1511#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1512 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1513 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1514 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1515 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1516#endif
1517#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1518 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1519#endif
1520 for (uint32_t i = 0; i < cMaxChunks; i++)
1521 {
1522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1523 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1524 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1525#else
1526 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1527#endif
1528 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1529#ifdef IN_RING0
1530 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1531#else
1532 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1533#endif
1534 }
1535 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1536
1537 /*
1538 * Do the initial allocations.
1539 */
1540 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1541 {
1542 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1543 AssertLogRelRCReturn(rc, rc);
1544 }
1545
1546 pExecMemAllocator->idxChunkHint = 0;
1547
1548 return VINF_SUCCESS;
1549}
1550
1551
1552/*********************************************************************************************************************************
1553* Native Recompilation *
1554*********************************************************************************************************************************/
1555
1556
1557/**
1558 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1559 */
1560IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1561{
1562 pVCpu->iem.s.cInstructions += idxInstr;
1563 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1564}
1565
1566
1567/**
1568 * Used by TB code when it wants to raise a \#GP(0).
1569 */
1570IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1571{
1572 pVCpu->iem.s.cInstructions += idxInstr;
1573 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1574#ifndef _MSC_VER
1575 return VINF_IEM_RAISED_XCPT; /* not reached */
1576#endif
1577}
1578
1579
1580/**
1581 * Reinitializes the native recompiler state.
1582 *
1583 * Called before starting a new recompile job.
1584 */
1585static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1586{
1587 pReNative->cLabels = 0;
1588 pReNative->bmLabelTypes = 0;
1589 pReNative->cFixups = 0;
1590#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1591 pReNative->pDbgInfo->cEntries = 0;
1592#endif
1593 pReNative->pTbOrg = pTb;
1594 pReNative->cCondDepth = 0;
1595 pReNative->uCondSeqNo = 0;
1596 pReNative->uCheckIrqSeqNo = 0;
1597
1598 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1599#if IEMNATIVE_HST_GREG_COUNT < 32
1600 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1601#endif
1602 ;
1603 pReNative->Core.bmHstRegsWithGstShadow = 0;
1604 pReNative->Core.bmGstRegShadows = 0;
1605 pReNative->Core.bmVars = 0;
1606 pReNative->Core.bmStack = 0;
1607 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1608 pReNative->Core.u64ArgVars = UINT64_MAX;
1609
1610 /* Full host register reinit: */
1611 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1612 {
1613 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1614 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1615 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1616 }
1617
1618 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1619 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1620#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1621 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1622#endif
1623#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1624 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1625#endif
1626 );
1627 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1628 {
1629 fRegs &= ~RT_BIT_32(idxReg);
1630 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1631 }
1632
1633 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1634#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1635 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1636#endif
1637#ifdef IEMNATIVE_REG_FIXED_TMP0
1638 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1639#endif
1640 return pReNative;
1641}
1642
1643
1644/**
1645 * Allocates and initializes the native recompiler state.
1646 *
1647 * This is called the first time an EMT wants to recompile something.
1648 *
1649 * @returns Pointer to the new recompiler state.
1650 * @param pVCpu The cross context virtual CPU structure of the calling
1651 * thread.
1652 * @param pTb The TB that's about to be recompiled.
1653 * @thread EMT(pVCpu)
1654 */
1655static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1656{
1657 VMCPU_ASSERT_EMT(pVCpu);
1658
1659 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1660 AssertReturn(pReNative, NULL);
1661
1662 /*
1663 * Try allocate all the buffers and stuff we need.
1664 */
1665 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1666 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1667 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1668#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1669 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1670#endif
1671 if (RT_LIKELY( pReNative->pInstrBuf
1672 && pReNative->paLabels
1673 && pReNative->paFixups)
1674#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1675 && pReNative->pDbgInfo
1676#endif
1677 )
1678 {
1679 /*
1680 * Set the buffer & array sizes on success.
1681 */
1682 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1683 pReNative->cLabelsAlloc = _8K;
1684 pReNative->cFixupsAlloc = _16K;
1685#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1686 pReNative->cDbgInfoAlloc = _16K;
1687#endif
1688
1689 /*
1690 * Done, just need to save it and reinit it.
1691 */
1692 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1693 return iemNativeReInit(pReNative, pTb);
1694 }
1695
1696 /*
1697 * Failed. Cleanup and return.
1698 */
1699 AssertFailed();
1700 RTMemFree(pReNative->pInstrBuf);
1701 RTMemFree(pReNative->paLabels);
1702 RTMemFree(pReNative->paFixups);
1703#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1704 RTMemFree(pReNative->pDbgInfo);
1705#endif
1706 RTMemFree(pReNative);
1707 return NULL;
1708}
1709
1710
1711/**
1712 * Creates a label
1713 *
1714 * If the label does not yet have a defined position,
1715 * call iemNativeLabelDefine() later to set it.
1716 *
1717 * @returns Label ID. Throws VBox status code on failure, so no need to check
1718 * the return value.
1719 * @param pReNative The native recompile state.
1720 * @param enmType The label type.
1721 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1722 * label is not yet defined (default).
1723 * @param uData Data associated with the lable. Only applicable to
1724 * certain type of labels. Default is zero.
1725 */
1726DECL_HIDDEN_THROW(uint32_t)
1727iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1728 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1729{
1730 /*
1731 * Locate existing label definition.
1732 *
1733 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1734 * and uData is zero.
1735 */
1736 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1737 uint32_t const cLabels = pReNative->cLabels;
1738 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1739#ifndef VBOX_STRICT
1740 && offWhere == UINT32_MAX
1741 && uData == 0
1742#endif
1743 )
1744 {
1745 /** @todo Since this is only used for labels with uData = 0, just use a
1746 * lookup array? */
1747 for (uint32_t i = 0; i < cLabels; i++)
1748 if ( paLabels[i].enmType == enmType
1749 && paLabels[i].uData == uData)
1750 {
1751#ifdef VBOX_STRICT
1752 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1753 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1754#endif
1755 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1756 return i;
1757 }
1758 }
1759
1760 /*
1761 * Make sure we've got room for another label.
1762 */
1763 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1764 { /* likely */ }
1765 else
1766 {
1767 uint32_t cNew = pReNative->cLabelsAlloc;
1768 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1769 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1770 cNew *= 2;
1771 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1772 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1773 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1774 pReNative->paLabels = paLabels;
1775 pReNative->cLabelsAlloc = cNew;
1776 }
1777
1778 /*
1779 * Define a new label.
1780 */
1781 paLabels[cLabels].off = offWhere;
1782 paLabels[cLabels].enmType = enmType;
1783 paLabels[cLabels].uData = uData;
1784 pReNative->cLabels = cLabels + 1;
1785
1786 Assert(enmType >= 0 && enmType < 64);
1787 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1788
1789 if (offWhere != UINT32_MAX)
1790 {
1791#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1792 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1793 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1794#endif
1795 }
1796 return cLabels;
1797}
1798
1799
1800/**
1801 * Defines the location of an existing label.
1802 *
1803 * @param pReNative The native recompile state.
1804 * @param idxLabel The label to define.
1805 * @param offWhere The position.
1806 */
1807DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1808{
1809 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1810 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1811 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1812 pLabel->off = offWhere;
1813#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1814 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1815 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1816#endif
1817}
1818
1819
1820/**
1821 * Looks up a lable.
1822 *
1823 * @returns Label ID if found, UINT32_MAX if not.
1824 */
1825static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1826 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1827{
1828 Assert(enmType >= 0 && enmType < 64);
1829 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1830 {
1831 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1832 uint32_t const cLabels = pReNative->cLabels;
1833 for (uint32_t i = 0; i < cLabels; i++)
1834 if ( paLabels[i].enmType == enmType
1835 && paLabels[i].uData == uData
1836 && ( paLabels[i].off == offWhere
1837 || offWhere == UINT32_MAX
1838 || paLabels[i].off == UINT32_MAX))
1839 return i;
1840 }
1841 return UINT32_MAX;
1842}
1843
1844
1845/**
1846 * Adds a fixup.
1847 *
1848 * @throws VBox status code (int) on failure.
1849 * @param pReNative The native recompile state.
1850 * @param offWhere The instruction offset of the fixup location.
1851 * @param idxLabel The target label ID for the fixup.
1852 * @param enmType The fixup type.
1853 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1854 */
1855DECL_HIDDEN_THROW(void)
1856iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1857 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1858{
1859 Assert(idxLabel <= UINT16_MAX);
1860 Assert((unsigned)enmType <= UINT8_MAX);
1861
1862 /*
1863 * Make sure we've room.
1864 */
1865 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1866 uint32_t const cFixups = pReNative->cFixups;
1867 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1868 { /* likely */ }
1869 else
1870 {
1871 uint32_t cNew = pReNative->cFixupsAlloc;
1872 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1873 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1874 cNew *= 2;
1875 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1876 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1877 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1878 pReNative->paFixups = paFixups;
1879 pReNative->cFixupsAlloc = cNew;
1880 }
1881
1882 /*
1883 * Add the fixup.
1884 */
1885 paFixups[cFixups].off = offWhere;
1886 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1887 paFixups[cFixups].enmType = enmType;
1888 paFixups[cFixups].offAddend = offAddend;
1889 pReNative->cFixups = cFixups + 1;
1890}
1891
1892
1893/**
1894 * Slow code path for iemNativeInstrBufEnsure.
1895 */
1896DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1897{
1898 /* Double the buffer size till we meet the request. */
1899 uint32_t cNew = pReNative->cInstrBufAlloc;
1900 AssertReturn(cNew > 0, NULL);
1901 do
1902 cNew *= 2;
1903 while (cNew < off + cInstrReq);
1904
1905 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1906#ifdef RT_ARCH_ARM64
1907 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1908#else
1909 uint32_t const cbMaxInstrBuf = _2M;
1910#endif
1911 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1912
1913 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1914 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1915
1916 pReNative->cInstrBufAlloc = cNew;
1917 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1918}
1919
1920#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1921
1922/**
1923 * Grows the static debug info array used during recompilation.
1924 *
1925 * @returns Pointer to the new debug info block; throws VBox status code on
1926 * failure, so no need to check the return value.
1927 */
1928DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1929{
1930 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1931 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1932 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1933 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1934 pReNative->pDbgInfo = pDbgInfo;
1935 pReNative->cDbgInfoAlloc = cNew;
1936 return pDbgInfo;
1937}
1938
1939
1940/**
1941 * Adds a new debug info uninitialized entry, returning the pointer to it.
1942 */
1943DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1944{
1945 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1946 { /* likely */ }
1947 else
1948 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1949 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1950}
1951
1952
1953/**
1954 * Debug Info: Adds a native offset record, if necessary.
1955 */
1956static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1957{
1958 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1959
1960 /*
1961 * Search backwards to see if we've got a similar record already.
1962 */
1963 uint32_t idx = pDbgInfo->cEntries;
1964 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1965 while (idx-- > idxStop)
1966 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1967 {
1968 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1969 return;
1970 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1971 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1972 break;
1973 }
1974
1975 /*
1976 * Add it.
1977 */
1978 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1979 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1980 pEntry->NativeOffset.offNative = off;
1981}
1982
1983
1984/**
1985 * Debug Info: Record info about a label.
1986 */
1987static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
1988{
1989 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1990 pEntry->Label.uType = kIemTbDbgEntryType_Label;
1991 pEntry->Label.uUnused = 0;
1992 pEntry->Label.enmLabel = (uint8_t)enmType;
1993 pEntry->Label.uData = uData;
1994}
1995
1996
1997/**
1998 * Debug Info: Record info about a threaded call.
1999 */
2000static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2001{
2002 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2003 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2004 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2005 pEntry->ThreadedCall.uUnused = 0;
2006 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2007}
2008
2009
2010/**
2011 * Debug Info: Record info about a new guest instruction.
2012 */
2013static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2014{
2015 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2016 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2017 pEntry->GuestInstruction.uUnused = 0;
2018 pEntry->GuestInstruction.fExec = fExec;
2019}
2020
2021
2022/**
2023 * Debug Info: Record info about guest register shadowing.
2024 */
2025static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2026 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2027{
2028 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2029 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2030 pEntry->GuestRegShadowing.uUnused = 0;
2031 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2032 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2033 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2034}
2035
2036#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2037
2038
2039/*********************************************************************************************************************************
2040* Register Allocator *
2041*********************************************************************************************************************************/
2042
2043/**
2044 * Register parameter indexes (indexed by argument number).
2045 */
2046DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2047{
2048 IEMNATIVE_CALL_ARG0_GREG,
2049 IEMNATIVE_CALL_ARG1_GREG,
2050 IEMNATIVE_CALL_ARG2_GREG,
2051 IEMNATIVE_CALL_ARG3_GREG,
2052#if defined(IEMNATIVE_CALL_ARG4_GREG)
2053 IEMNATIVE_CALL_ARG4_GREG,
2054# if defined(IEMNATIVE_CALL_ARG5_GREG)
2055 IEMNATIVE_CALL_ARG5_GREG,
2056# if defined(IEMNATIVE_CALL_ARG6_GREG)
2057 IEMNATIVE_CALL_ARG6_GREG,
2058# if defined(IEMNATIVE_CALL_ARG7_GREG)
2059 IEMNATIVE_CALL_ARG7_GREG,
2060# endif
2061# endif
2062# endif
2063#endif
2064};
2065
2066/**
2067 * Call register masks indexed by argument count.
2068 */
2069DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2070{
2071 0,
2072 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2073 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2074 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2075 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2076 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2077#if defined(IEMNATIVE_CALL_ARG4_GREG)
2078 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2079 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2080# if defined(IEMNATIVE_CALL_ARG5_GREG)
2081 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2082 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2083# if defined(IEMNATIVE_CALL_ARG6_GREG)
2084 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2085 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2086 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2087# if defined(IEMNATIVE_CALL_ARG7_GREG)
2088 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2089 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2090 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2091# endif
2092# endif
2093# endif
2094#endif
2095};
2096
2097#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2098/**
2099 * BP offset of the stack argument slots.
2100 *
2101 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2102 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2103 */
2104DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2105{
2106 IEMNATIVE_FP_OFF_STACK_ARG0,
2107# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2108 IEMNATIVE_FP_OFF_STACK_ARG1,
2109# endif
2110# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2111 IEMNATIVE_FP_OFF_STACK_ARG2,
2112# endif
2113# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2114 IEMNATIVE_FP_OFF_STACK_ARG3,
2115# endif
2116};
2117AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2118#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2119
2120/**
2121 * Info about shadowed guest register values.
2122 * @see IEMNATIVEGSTREG
2123 */
2124static struct
2125{
2126 /** Offset in VMCPU. */
2127 uint32_t off;
2128 /** The field size. */
2129 uint8_t cb;
2130 /** Name (for logging). */
2131 const char *pszName;
2132} const g_aGstShadowInfo[] =
2133{
2134#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2135 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2136 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2137 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2138 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2139 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2140 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2141 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2142 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2143 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2144 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2145 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2146 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2147 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2148 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2151 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2152 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2153 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2154 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2155 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2156 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2157 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2158 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2159 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2160 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2161 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2162 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2163 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2164 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2165 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2166 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2167 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2168 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2169 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2170 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2171 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2172 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2173 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2174 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2175 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2176 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2177#undef CPUMCTX_OFF_AND_SIZE
2178};
2179AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2180
2181
2182/** Host CPU general purpose register names. */
2183const char * const g_apszIemNativeHstRegNames[] =
2184{
2185#ifdef RT_ARCH_AMD64
2186 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2187#elif RT_ARCH_ARM64
2188 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2189 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2190#else
2191# error "port me"
2192#endif
2193};
2194
2195
2196DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2197 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2198{
2199 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2200
2201 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2202 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2203 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2204 return (uint8_t)idxReg;
2205}
2206
2207
2208/**
2209 * Tries to locate a suitable register in the given register mask.
2210 *
2211 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2212 * failed.
2213 *
2214 * @returns Host register number on success, returns UINT8_MAX on failure.
2215 */
2216static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2217{
2218 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2219 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2220 if (fRegs)
2221 {
2222 /** @todo pick better here: */
2223 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2224
2225 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2226 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2227 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2228 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2229
2230 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2231 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2232 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2233 return idxReg;
2234 }
2235 return UINT8_MAX;
2236}
2237
2238
2239/**
2240 * Locate a register, possibly freeing one up.
2241 *
2242 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2243 * failed.
2244 *
2245 * @returns Host register number on success. Returns UINT8_MAX if no registers
2246 * found, the caller is supposed to deal with this and raise a
2247 * allocation type specific status code (if desired).
2248 *
2249 * @throws VBox status code if we're run into trouble spilling a variable of
2250 * recording debug info. Does NOT throw anything if we're out of
2251 * registers, though.
2252 */
2253static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2254 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2255{
2256 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2257 Assert(!(fRegMask & ~IEMNATIVE_REG_FIXED_MASK));
2258
2259 /*
2260 * Try a freed register that's shadowing a guest register
2261 */
2262 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2263 if (fRegs)
2264 {
2265 unsigned const idxReg = (fPreferVolatile
2266 ? ASMBitFirstSetU32(fRegs)
2267 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2268 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2269 - 1;
2270
2271 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2272 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2273 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2274 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2275
2276 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2277 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2278 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2279 return idxReg;
2280 }
2281
2282 /*
2283 * Try free up a variable that's in a register.
2284 *
2285 * We do two rounds here, first evacuating variables we don't need to be
2286 * saved on the stack, then in the second round move things to the stack.
2287 */
2288 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2289 {
2290 uint32_t fVars = pReNative->Core.bmVars;
2291 while (fVars)
2292 {
2293 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2294 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2295 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2296 && (RT_BIT_32(idxReg) & fRegMask)
2297 && ( iLoop == 0
2298 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2299 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2300 {
2301 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2302 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2303 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2304 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2305 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2306
2307 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2308 {
2309 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
2310 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2311 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff,
2312 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2313 - IEMNATIVE_FP_OFF_STACK_VARS,
2314 idxReg);
2315 }
2316
2317 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2318 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2319 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2320 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2321 return idxReg;
2322 }
2323 fVars &= ~RT_BIT_32(idxVar);
2324 }
2325 }
2326
2327 return UINT8_MAX;
2328}
2329
2330
2331/**
2332 * Moves a variable to a different register or spills it onto the stack.
2333 *
2334 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2335 * kinds can easily be recreated if needed later.
2336 *
2337 * @returns The new code buffer position, UINT32_MAX on failure.
2338 * @param pReNative The native recompile state.
2339 * @param off The current code buffer position.
2340 * @param idxVar The variable index.
2341 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2342 * call-volatile registers.
2343 */
2344static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2345 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2346{
2347 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2348 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2349
2350 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2351 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2352 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2353 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2354 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2355 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2356 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2357 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2358
2359
2360 /** @todo Add statistics on this.*/
2361 /** @todo Implement basic variable liveness analysis (python) so variables
2362 * can be freed immediately once no longer used. This has the potential to
2363 * be trashing registers and stack for dead variables. */
2364
2365 /*
2366 * First try move it to a different register, as that's cheaper.
2367 */
2368 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2369 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2370 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2371 if (fRegs)
2372 {
2373 /* Avoid using shadow registers, if possible. */
2374 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2375 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2376 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2377
2378 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2379 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2380 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2381 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2382 if (fGstRegShadows)
2383 {
2384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2385 while (fGstRegShadows)
2386 {
2387 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2388 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2389
2390 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2391 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2392 }
2393 }
2394
2395 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2396 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2397 }
2398 /*
2399 * Otherwise we must spill the register onto the stack.
2400 */
2401 else
2402 {
2403 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2404 off = iemNativeEmitStoreGprByBp(pReNative, off,
2405 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2406 - IEMNATIVE_FP_OFF_STACK_VARS,
2407 idxRegOld);
2408
2409 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2410 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2411 }
2412
2413 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2414 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2415 return off;
2416}
2417
2418
2419/**
2420 * Allocates a temporary host general purpose register.
2421 *
2422 * This may emit code to save register content onto the stack in order to free
2423 * up a register.
2424 *
2425 * @returns The host register number; throws VBox status code on failure,
2426 * so no need to check the return value.
2427 * @param pReNative The native recompile state.
2428 * @param poff Pointer to the variable with the code buffer position.
2429 * This will be update if we need to move a variable from
2430 * register to stack in order to satisfy the request.
2431 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2432 * registers (@c true, default) or the other way around
2433 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2434 */
2435DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2436{
2437 /*
2438 * Try find a completely unused register, preferably a call-volatile one.
2439 */
2440 uint8_t idxReg;
2441 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2442 & ~pReNative->Core.bmHstRegsWithGstShadow
2443 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2444 if (fRegs)
2445 {
2446 if (fPreferVolatile)
2447 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2448 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2449 else
2450 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2451 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2452 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2453 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2454 }
2455 else
2456 {
2457 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2458 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2459 }
2460 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2461}
2462
2463
2464/**
2465 * Allocates a temporary register for loading an immediate value into.
2466 *
2467 * This will emit code to load the immediate, unless there happens to be an
2468 * unused register with the value already loaded.
2469 *
2470 * The caller will not modify the returned register, it must be considered
2471 * read-only. Free using iemNativeRegFreeTmpImm.
2472 *
2473 * @returns The host register number; throws VBox status code on failure, so no
2474 * need to check the return value.
2475 * @param pReNative The native recompile state.
2476 * @param poff Pointer to the variable with the code buffer position.
2477 * @param uImm The immediate value that the register must hold upon
2478 * return.
2479 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2480 * registers (@c true, default) or the other way around
2481 * (@c false).
2482 *
2483 * @note Reusing immediate values has not been implemented yet.
2484 */
2485DECL_HIDDEN_THROW(uint8_t)
2486iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2487{
2488 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2489 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2490 return idxReg;
2491}
2492
2493
2494/**
2495 * Marks host register @a idxHstReg as containing a shadow copy of guest
2496 * register @a enmGstReg.
2497 *
2498 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2499 * host register before calling.
2500 */
2501DECL_FORCE_INLINE(void)
2502iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2503{
2504 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2505
2506 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2507 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2508 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2509 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2510#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2511 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2512 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2513#else
2514 RT_NOREF(off);
2515#endif
2516}
2517
2518
2519/**
2520 * Clear any guest register shadow claims from @a idxHstReg.
2521 *
2522 * The register does not need to be shadowing any guest registers.
2523 */
2524DECL_FORCE_INLINE(void)
2525iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2526{
2527 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2528 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2529 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2530 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2531
2532#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2533 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2534 if (fGstRegs)
2535 {
2536 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2537 while (fGstRegs)
2538 {
2539 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2540 fGstRegs &= ~RT_BIT_64(iGstReg);
2541 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2542 }
2543 }
2544#else
2545 RT_NOREF(off);
2546#endif
2547
2548 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2549 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2550 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2551}
2552
2553
2554/**
2555 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2556 * to @a idxRegTo.
2557 */
2558DECL_FORCE_INLINE(void)
2559iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2560 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2561{
2562 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2563 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2564 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2565 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2566 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2567
2568 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2569 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2570 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2571#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2572 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2573 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2574#else
2575 RT_NOREF(off);
2576#endif
2577}
2578
2579
2580/**
2581 * Allocates a temporary host general purpose register for keeping a guest
2582 * register value.
2583 *
2584 * Since we may already have a register holding the guest register value,
2585 * code will be emitted to do the loading if that's not the case. Code may also
2586 * be emitted if we have to free up a register to satify the request.
2587 *
2588 * @returns The host register number; throws VBox status code on failure, so no
2589 * need to check the return value.
2590 * @param pReNative The native recompile state.
2591 * @param poff Pointer to the variable with the code buffer
2592 * position. This will be update if we need to move a
2593 * variable from register to stack in order to satisfy
2594 * the request.
2595 * @param enmGstReg The guest register that will is to be updated.
2596 * @param enmIntendedUse How the caller will be using the host register.
2597 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2598 */
2599DECL_HIDDEN_THROW(uint8_t)
2600iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2601 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2602{
2603 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2604#ifdef LOG_ENABLED
2605 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2606#endif
2607
2608 /*
2609 * First check if the guest register value is already in a host register.
2610 */
2611 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2612 {
2613 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2614 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2615 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2616 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2617
2618 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2619 {
2620 /*
2621 * If the register will trash the guest shadow copy, try find a
2622 * completely unused register we can use instead. If that fails,
2623 * we need to disassociate the host reg from the guest reg.
2624 */
2625 /** @todo would be nice to know if preserving the register is in any way helpful. */
2626 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2627 && ( ~pReNative->Core.bmHstRegs
2628 & ~pReNative->Core.bmHstRegsWithGstShadow
2629 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2630 {
2631 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2632
2633 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2634
2635 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2636 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2637 g_apszIemNativeHstRegNames[idxRegNew]));
2638 idxReg = idxRegNew;
2639 }
2640 else
2641 {
2642 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2643 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2644 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2645 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2646 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2647 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2648 else
2649 {
2650 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2651 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2652 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2653 }
2654 }
2655 }
2656 else
2657 {
2658 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2659 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2660
2661 /*
2662 * Allocate a new register, copy the value and, if updating, the
2663 * guest shadow copy assignment to the new register.
2664 */
2665 /** @todo share register for readonly access. */
2666 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2667
2668 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2669
2670 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2671 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2672 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2673 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2674 else
2675 {
2676 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2677 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2678 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2679 g_apszIemNativeHstRegNames[idxRegNew]));
2680 }
2681 idxReg = idxRegNew;
2682 }
2683
2684#ifdef VBOX_STRICT
2685 /* Strict builds: Check that the value is correct. */
2686 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2687#endif
2688
2689 return idxReg;
2690 }
2691
2692 /*
2693 * Allocate a new register, load it with the guest value and designate it as a copy of the
2694 */
2695 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2696
2697 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2698
2699 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2700 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2701 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2702 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2703
2704 return idxRegNew;
2705}
2706
2707
2708/**
2709 * Allocates a temporary host general purpose register that already holds the
2710 * given guest register value.
2711 *
2712 * The use case for this function is places where the shadowing state cannot be
2713 * modified due to branching and such. This will fail if the we don't have a
2714 * current shadow copy handy or if it's incompatible. The only code that will
2715 * be emitted here is value checking code in strict builds.
2716 *
2717 * The intended use can only be readonly!
2718 *
2719 * @returns The host register number, UINT8_MAX if not present.
2720 * @param pReNative The native recompile state.
2721 * @param poff Pointer to the instruction buffer offset.
2722 * Will be updated in strict builds if a register is
2723 * found.
2724 * @param enmGstReg The guest register that will is to be updated.
2725 * @note In strict builds, this may throw instruction buffer growth failures.
2726 * Non-strict builds will not throw anything.
2727 * @sa iemNativeRegAllocTmpForGuestReg
2728 */
2729DECL_HIDDEN_THROW(uint8_t)
2730iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2731{
2732 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2733
2734 /*
2735 * First check if the guest register value is already in a host register.
2736 */
2737 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2738 {
2739 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2740 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2741 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2742 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2743
2744 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2745 {
2746 /*
2747 * We only do readonly use here, so easy compared to the other
2748 * variant of this code.
2749 */
2750 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2751 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2752 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2753 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2754 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2755
2756#ifdef VBOX_STRICT
2757 /* Strict builds: Check that the value is correct. */
2758 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2759#else
2760 RT_NOREF(poff);
2761#endif
2762 return idxReg;
2763 }
2764 }
2765
2766 return UINT8_MAX;
2767}
2768
2769
2770DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2771
2772
2773/**
2774 * Allocates argument registers for a function call.
2775 *
2776 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2777 * need to check the return value.
2778 * @param pReNative The native recompile state.
2779 * @param off The current code buffer offset.
2780 * @param cArgs The number of arguments the function call takes.
2781 */
2782DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2783{
2784 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2785 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2786 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2787 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2788
2789 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2790 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2791 else if (cArgs == 0)
2792 return true;
2793
2794 /*
2795 * Do we get luck and all register are free and not shadowing anything?
2796 */
2797 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2798 for (uint32_t i = 0; i < cArgs; i++)
2799 {
2800 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2801 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2802 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2803 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2804 }
2805 /*
2806 * Okay, not lucky so we have to free up the registers.
2807 */
2808 else
2809 for (uint32_t i = 0; i < cArgs; i++)
2810 {
2811 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2812 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2813 {
2814 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2815 {
2816 case kIemNativeWhat_Var:
2817 {
2818 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2819 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2820 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2821 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2822 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2823
2824 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2825 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2826 else
2827 {
2828 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2829 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2830 }
2831 break;
2832 }
2833
2834 case kIemNativeWhat_Tmp:
2835 case kIemNativeWhat_Arg:
2836 case kIemNativeWhat_rc:
2837 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2838 default:
2839 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2840 }
2841
2842 }
2843 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2844 {
2845 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2846 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2847 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2848 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2849 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2850 }
2851 else
2852 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2853 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2854 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2855 }
2856 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2857 return true;
2858}
2859
2860
2861DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2862
2863
2864#if 0
2865/**
2866 * Frees a register assignment of any type.
2867 *
2868 * @param pReNative The native recompile state.
2869 * @param idxHstReg The register to free.
2870 *
2871 * @note Does not update variables.
2872 */
2873DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2874{
2875 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2876 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2877 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2878 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2879 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2880 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2881 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2882 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2883 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2884 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2885 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2886 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2887 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2888 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2889
2890 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2891 /* no flushing, right:
2892 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2893 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2894 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2895 */
2896}
2897#endif
2898
2899
2900/**
2901 * Frees a temporary register.
2902 *
2903 * Any shadow copies of guest registers assigned to the host register will not
2904 * be flushed by this operation.
2905 */
2906DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2907{
2908 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2909 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2910 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2911 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2912 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2913}
2914
2915
2916/**
2917 * Frees a temporary immediate register.
2918 *
2919 * It is assumed that the call has not modified the register, so it still hold
2920 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2921 */
2922DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2923{
2924 iemNativeRegFreeTmp(pReNative, idxHstReg);
2925}
2926
2927
2928/**
2929 * Called right before emitting a call instruction to move anything important
2930 * out of call-volatile registers, free and flush the call-volatile registers,
2931 * optionally freeing argument variables.
2932 *
2933 * @returns New code buffer offset, UINT32_MAX on failure.
2934 * @param pReNative The native recompile state.
2935 * @param off The code buffer offset.
2936 * @param cArgs The number of arguments the function call takes.
2937 * It is presumed that the host register part of these have
2938 * been allocated as such already and won't need moving,
2939 * just freeing.
2940 */
2941DECL_HIDDEN_THROW(uint32_t)
2942iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2943{
2944 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
2945
2946 /*
2947 * Move anything important out of volatile registers.
2948 */
2949 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2950 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2951 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2952#ifdef IEMNATIVE_REG_FIXED_TMP0
2953 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2954#endif
2955 & ~g_afIemNativeCallRegs[cArgs];
2956
2957 fRegsToMove &= pReNative->Core.bmHstRegs;
2958 if (!fRegsToMove)
2959 { /* likely */ }
2960 else
2961 while (fRegsToMove != 0)
2962 {
2963 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2964 fRegsToMove &= ~RT_BIT_32(idxReg);
2965
2966 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2967 {
2968 case kIemNativeWhat_Var:
2969 {
2970 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2971 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2972 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2973 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2974 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2975 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2976 else
2977 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2978 continue;
2979 }
2980
2981 case kIemNativeWhat_Arg:
2982 AssertMsgFailed(("What?!?: %u\n", idxReg));
2983 continue;
2984
2985 case kIemNativeWhat_rc:
2986 case kIemNativeWhat_Tmp:
2987 AssertMsgFailed(("Missing free: %u\n", idxReg));
2988 continue;
2989
2990 case kIemNativeWhat_FixedTmp:
2991 case kIemNativeWhat_pVCpuFixed:
2992 case kIemNativeWhat_pCtxFixed:
2993 case kIemNativeWhat_FixedReserved:
2994 case kIemNativeWhat_Invalid:
2995 case kIemNativeWhat_End:
2996 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
2997 }
2998 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
2999 }
3000
3001 /*
3002 * Do the actual freeing.
3003 */
3004 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3005
3006 /* If there are guest register shadows in any call-volatile register, we
3007 have to clear the corrsponding guest register masks for each register. */
3008 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3009 if (fHstRegsWithGstShadow)
3010 {
3011 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3012 do
3013 {
3014 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3015 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
3016
3017 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3018 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3019 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3020 } while (fHstRegsWithGstShadow != 0);
3021 }
3022
3023 return off;
3024}
3025
3026
3027/**
3028 * Flushes a set of guest register shadow copies.
3029 *
3030 * This is usually done after calling a threaded function or a C-implementation
3031 * of an instruction.
3032 *
3033 * @param pReNative The native recompile state.
3034 * @param fGstRegs Set of guest registers to flush.
3035 */
3036DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3037{
3038 /*
3039 * Reduce the mask by what's currently shadowed
3040 */
3041 fGstRegs &= pReNative->Core.bmGstRegShadows;
3042 if (fGstRegs)
3043 {
3044 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3045 if (pReNative->Core.bmGstRegShadows)
3046 {
3047 /*
3048 * Partial.
3049 */
3050 do
3051 {
3052 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3053 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3054 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3055 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3056 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3057
3058 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3059 fGstRegs &= ~fInThisHstReg;
3060 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
3061 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3062 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3063 } while (fGstRegs != 0);
3064 }
3065 else
3066 {
3067 /*
3068 * Clear all.
3069 */
3070 do
3071 {
3072 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3073 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3074 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3075 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3076 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3077
3078 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3079 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3080 } while (fGstRegs != 0);
3081 pReNative->Core.bmHstRegsWithGstShadow = 0;
3082 }
3083 }
3084}
3085
3086
3087/**
3088 * Flushes any delayed guest register writes.
3089 *
3090 * This must be called prior to calling CImpl functions and any helpers that use
3091 * the guest state (like raising exceptions) and such.
3092 *
3093 * This optimization has not yet been implemented. The first target would be
3094 * RIP updates, since these are the most common ones.
3095 */
3096DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3097{
3098 RT_NOREF(pReNative, off);
3099 return off;
3100}
3101
3102
3103/*********************************************************************************************************************************
3104* Code Emitters (larger snippets) *
3105*********************************************************************************************************************************/
3106
3107/**
3108 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3109 * extending to 64-bit width.
3110 *
3111 * @returns New code buffer offset on success, UINT32_MAX on failure.
3112 * @param pReNative .
3113 * @param off The current code buffer position.
3114 * @param idxHstReg The host register to load the guest register value into.
3115 * @param enmGstReg The guest register to load.
3116 *
3117 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3118 * that is something the caller needs to do if applicable.
3119 */
3120DECL_HIDDEN_THROW(uint32_t)
3121iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3122{
3123 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3124 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3125
3126 switch (g_aGstShadowInfo[enmGstReg].cb)
3127 {
3128 case sizeof(uint64_t):
3129 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3130 case sizeof(uint32_t):
3131 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3132 case sizeof(uint16_t):
3133 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3134#if 0 /* not present in the table. */
3135 case sizeof(uint8_t):
3136 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3137#endif
3138 default:
3139 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3140 }
3141}
3142
3143
3144#ifdef VBOX_STRICT
3145/**
3146 * Emitting code that checks that the content of register @a idxReg is the same
3147 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3148 * instruction if that's not the case.
3149 *
3150 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3151 * Trashes EFLAGS on AMD64.
3152 */
3153static uint32_t
3154iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3155{
3156# ifdef RT_ARCH_AMD64
3157 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3158
3159 /* cmp reg, [mem] */
3160 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3161 {
3162 if (idxReg >= 8)
3163 pbCodeBuf[off++] = X86_OP_REX_R;
3164 pbCodeBuf[off++] = 0x38;
3165 }
3166 else
3167 {
3168 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3169 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3170 else
3171 {
3172 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3173 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3174 else
3175 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3176 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3177 if (idxReg >= 8)
3178 pbCodeBuf[off++] = X86_OP_REX_R;
3179 }
3180 pbCodeBuf[off++] = 0x39;
3181 }
3182 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3183
3184 /* je/jz +1 */
3185 pbCodeBuf[off++] = 0x74;
3186 pbCodeBuf[off++] = 0x01;
3187
3188 /* int3 */
3189 pbCodeBuf[off++] = 0xcc;
3190
3191 /* For values smaller than the register size, we must check that the rest
3192 of the register is all zeros. */
3193 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3194 {
3195 /* test reg64, imm32 */
3196 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3197 pbCodeBuf[off++] = 0xf7;
3198 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3199 pbCodeBuf[off++] = 0;
3200 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3201 pbCodeBuf[off++] = 0xff;
3202 pbCodeBuf[off++] = 0xff;
3203
3204 /* je/jz +1 */
3205 pbCodeBuf[off++] = 0x74;
3206 pbCodeBuf[off++] = 0x01;
3207
3208 /* int3 */
3209 pbCodeBuf[off++] = 0xcc;
3210 }
3211 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3212 {
3213 /* rol reg64, 32 */
3214 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3215 pbCodeBuf[off++] = 0xc1;
3216 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3217 pbCodeBuf[off++] = 32;
3218
3219 /* test reg32, ffffffffh */
3220 if (idxReg >= 8)
3221 pbCodeBuf[off++] = X86_OP_REX_B;
3222 pbCodeBuf[off++] = 0xf7;
3223 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3224 pbCodeBuf[off++] = 0xff;
3225 pbCodeBuf[off++] = 0xff;
3226 pbCodeBuf[off++] = 0xff;
3227 pbCodeBuf[off++] = 0xff;
3228
3229 /* je/jz +1 */
3230 pbCodeBuf[off++] = 0x74;
3231 pbCodeBuf[off++] = 0x01;
3232
3233 /* int3 */
3234 pbCodeBuf[off++] = 0xcc;
3235
3236 /* rol reg64, 32 */
3237 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3238 pbCodeBuf[off++] = 0xc1;
3239 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3240 pbCodeBuf[off++] = 32;
3241 }
3242
3243# elif defined(RT_ARCH_ARM64)
3244 /* mov TMP0, [gstreg] */
3245 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3246
3247 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3248 /* sub tmp0, tmp0, idxReg */
3249 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3250 /* cbz tmp0, +1 */
3251 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
3252 /* brk #0x1000+enmGstReg */
3253 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3254
3255# else
3256# error "Port me!"
3257# endif
3258 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3259 return off;
3260}
3261#endif /* VBOX_STRICT */
3262
3263
3264
3265/**
3266 * Emits a code for checking the return code of a call and rcPassUp, returning
3267 * from the code if either are non-zero.
3268 */
3269DECL_HIDDEN_THROW(uint32_t)
3270iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3271{
3272#ifdef RT_ARCH_AMD64
3273 /*
3274 * AMD64: eax = call status code.
3275 */
3276
3277 /* edx = rcPassUp */
3278 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3279# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3280 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3281# endif
3282
3283 /* edx = eax | rcPassUp */
3284 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3285 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3288
3289 /* Jump to non-zero status return path. */
3290 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3291
3292 /* done. */
3293
3294#elif RT_ARCH_ARM64
3295 /*
3296 * ARM64: w0 = call status code.
3297 */
3298 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3299 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3300
3301 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3302
3303 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3304
3305 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3306 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3307 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
3308
3309#else
3310# error "port me"
3311#endif
3312 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3313 return off;
3314}
3315
3316
3317/**
3318 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3319 * raising a \#GP(0) if it isn't.
3320 *
3321 * @returns New code buffer offset, UINT32_MAX on failure.
3322 * @param pReNative The native recompile state.
3323 * @param off The code buffer offset.
3324 * @param idxAddrReg The host register with the address to check.
3325 * @param idxInstr The current instruction.
3326 */
3327DECL_HIDDEN_THROW(uint32_t)
3328iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3329{
3330 RT_NOREF(idxInstr);
3331
3332 /*
3333 * Make sure we don't have any outstanding guest register writes as we may
3334 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3335 */
3336 off = iemNativeRegFlushPendingWrites(pReNative, off);
3337
3338#ifdef RT_ARCH_AMD64
3339 /*
3340 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3341 * return raisexcpt();
3342 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3343 */
3344 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3345
3346 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3347 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3348 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3349 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3350
3351# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3352 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3353# else
3354 uint32_t const offFixup = off;
3355 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3356 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3357 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3358 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3359# endif
3360
3361 iemNativeRegFreeTmp(pReNative, iTmpReg);
3362
3363#elif defined(RT_ARCH_ARM64)
3364 /*
3365 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3366 * return raisexcpt();
3367 * ----
3368 * mov x1, 0x800000000000
3369 * add x1, x0, x1
3370 * cmp xzr, x1, lsr 48
3371 * and either:
3372 * b.ne .Lraisexcpt
3373 * or:
3374 * b.eq .Lnoexcept
3375 * movz x1, #instruction-number
3376 * b .Lraisexcpt
3377 * .Lnoexcept:
3378 */
3379 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3380
3381 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3382 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3383 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3384
3385# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3386 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3387# else
3388 uint32_t const offFixup = off;
3389 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3390 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3391 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3392 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3393# endif
3394
3395 iemNativeRegFreeTmp(pReNative, iTmpReg);
3396
3397#else
3398# error "Port me"
3399#endif
3400 return off;
3401}
3402
3403
3404/**
3405 * Emits code to check if the content of @a idxAddrReg is within the limit of
3406 * idxSegReg, raising a \#GP(0) if it isn't.
3407 *
3408 * @returns New code buffer offset; throws VBox status code on error.
3409 * @param pReNative The native recompile state.
3410 * @param off The code buffer offset.
3411 * @param idxAddrReg The host register (32-bit) with the address to
3412 * check.
3413 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3414 * against.
3415 * @param idxInstr The current instruction.
3416 */
3417DECL_HIDDEN_THROW(uint32_t)
3418iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3419 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3420{
3421 /*
3422 * Make sure we don't have any outstanding guest register writes as we may
3423 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3424 */
3425 off = iemNativeRegFlushPendingWrites(pReNative, off);
3426
3427 /** @todo implement expand down/whatnot checking */
3428 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3429
3430 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3431 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3432 kIemNativeGstRegUse_ForUpdate);
3433
3434 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3435
3436#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3437 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3438 RT_NOREF(idxInstr);
3439#else
3440 uint32_t const offFixup = off;
3441 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3442 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3443 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3444 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3445#endif
3446
3447 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3448 return off;
3449}
3450
3451
3452/**
3453 * Emits a call to a CImpl function or something similar.
3454 */
3455static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3456 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3457 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3458{
3459 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3460 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3461
3462 /*
3463 * Load the parameters.
3464 */
3465#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3466 /* Special code the hidden VBOXSTRICTRC pointer. */
3467 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3468 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3469 if (cAddParams > 0)
3470 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3471 if (cAddParams > 1)
3472 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3473 if (cAddParams > 2)
3474 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3475 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3476
3477#else
3478 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3479 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3480 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3481 if (cAddParams > 0)
3482 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3483 if (cAddParams > 1)
3484 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3485 if (cAddParams > 2)
3486# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3487 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3488# else
3489 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3490# endif
3491#endif
3492
3493 /*
3494 * Make the call.
3495 */
3496 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3497
3498#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3499 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3500#endif
3501
3502 /*
3503 * Check the status code.
3504 */
3505 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3506}
3507
3508
3509/**
3510 * Emits a call to a threaded worker function.
3511 */
3512static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3513{
3514 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3515 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3516 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3517
3518#ifdef RT_ARCH_AMD64
3519 /* Load the parameters and emit the call. */
3520# ifdef RT_OS_WINDOWS
3521# ifndef VBOXSTRICTRC_STRICT_ENABLED
3522 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3523 if (cParams > 0)
3524 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3525 if (cParams > 1)
3526 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3527 if (cParams > 2)
3528 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3529# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3530 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3531 if (cParams > 0)
3532 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3533 if (cParams > 1)
3534 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3535 if (cParams > 2)
3536 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3537 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3538 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3539# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3540# else
3541 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3542 if (cParams > 0)
3543 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3544 if (cParams > 1)
3545 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3546 if (cParams > 2)
3547 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3548# endif
3549
3550 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3551
3552# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3553 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3554# endif
3555
3556#elif RT_ARCH_ARM64
3557 /*
3558 * ARM64:
3559 */
3560 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3561 if (cParams > 0)
3562 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3563 if (cParams > 1)
3564 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3565 if (cParams > 2)
3566 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3567
3568 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3569
3570#else
3571# error "port me"
3572#endif
3573
3574 /*
3575 * Check the status code.
3576 */
3577 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3578
3579 return off;
3580}
3581
3582
3583/**
3584 * Emits the code at the RaiseGP0 label.
3585 */
3586static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3587{
3588 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3589 if (idxLabel != UINT32_MAX)
3590 {
3591 iemNativeLabelDefine(pReNative, idxLabel, off);
3592
3593 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3594 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3595#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3596 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3597#endif
3598 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3599
3600 /* jump back to the return sequence. */
3601 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3602 }
3603 return off;
3604}
3605
3606
3607/**
3608 * Emits the code at the ReturnWithFlags label (returns
3609 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3610 */
3611static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3612{
3613 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3614 if (idxLabel != UINT32_MAX)
3615 {
3616 iemNativeLabelDefine(pReNative, idxLabel, off);
3617
3618 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3619
3620 /* jump back to the return sequence. */
3621 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3622 }
3623 return off;
3624}
3625
3626
3627/**
3628 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3629 */
3630static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3631{
3632 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3633 if (idxLabel != UINT32_MAX)
3634 {
3635 iemNativeLabelDefine(pReNative, idxLabel, off);
3636
3637 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3638
3639 /* jump back to the return sequence. */
3640 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3641 }
3642 return off;
3643}
3644
3645
3646/**
3647 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3648 */
3649static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3650{
3651 /*
3652 * Generate the rc + rcPassUp fiddling code if needed.
3653 */
3654 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3655 if (idxLabel != UINT32_MAX)
3656 {
3657 iemNativeLabelDefine(pReNative, idxLabel, off);
3658
3659 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3660#ifdef RT_ARCH_AMD64
3661# ifdef RT_OS_WINDOWS
3662# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3663 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3664# endif
3665 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3666 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3667# else
3668 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3669 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3670# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3671 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3672# endif
3673# endif
3674# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3675 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3676# endif
3677
3678#else
3679 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3680 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3681 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3682#endif
3683
3684 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3685 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3686 }
3687 return off;
3688}
3689
3690
3691/**
3692 * Emits a standard epilog.
3693 */
3694static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3695{
3696 *pidxReturnLabel = UINT32_MAX;
3697
3698 /*
3699 * Successful return, so clear the return register (eax, w0).
3700 */
3701 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3702
3703 /*
3704 * Define label for common return point.
3705 */
3706 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3707 *pidxReturnLabel = idxReturn;
3708
3709 /*
3710 * Restore registers and return.
3711 */
3712#ifdef RT_ARCH_AMD64
3713 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3714
3715 /* Reposition esp at the r15 restore point. */
3716 pbCodeBuf[off++] = X86_OP_REX_W;
3717 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3718 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3719 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3720
3721 /* Pop non-volatile registers and return */
3722 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3723 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3724 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3725 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3726 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3727 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3728 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3729 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3730# ifdef RT_OS_WINDOWS
3731 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3732 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3733# endif
3734 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3735 pbCodeBuf[off++] = 0xc9; /* leave */
3736 pbCodeBuf[off++] = 0xc3; /* ret */
3737 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3738
3739#elif RT_ARCH_ARM64
3740 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3741
3742 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3743 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3744 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3745 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3746 IEMNATIVE_FRAME_VAR_SIZE / 8);
3747 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3748 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3749 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3750 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3751 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3752 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3753 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3754 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3755 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3756 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3757 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3758 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3759
3760 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3761 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3762 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3763 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3764
3765 /* retab / ret */
3766# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3767 if (1)
3768 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3769 else
3770# endif
3771 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3772
3773#else
3774# error "port me"
3775#endif
3776 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3777
3778 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3779}
3780
3781
3782/**
3783 * Emits a standard prolog.
3784 */
3785static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3786{
3787#ifdef RT_ARCH_AMD64
3788 /*
3789 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3790 * reserving 64 bytes for stack variables plus 4 non-register argument
3791 * slots. Fixed register assignment: xBX = pReNative;
3792 *
3793 * Since we always do the same register spilling, we can use the same
3794 * unwind description for all the code.
3795 */
3796 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3797 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3798 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3799 pbCodeBuf[off++] = 0x8b;
3800 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3801 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3802 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3803# ifdef RT_OS_WINDOWS
3804 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3805 pbCodeBuf[off++] = 0x8b;
3806 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3807 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3808 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3809# else
3810 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3811 pbCodeBuf[off++] = 0x8b;
3812 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3813# endif
3814 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3815 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3816 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3817 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3818 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3819 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3820 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3821 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3822
3823 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3824 X86_GREG_xSP,
3825 IEMNATIVE_FRAME_ALIGN_SIZE
3826 + IEMNATIVE_FRAME_VAR_SIZE
3827 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3828 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3829 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3830 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3831 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3832
3833#elif RT_ARCH_ARM64
3834 /*
3835 * We set up a stack frame exactly like on x86, only we have to push the
3836 * return address our selves here. We save all non-volatile registers.
3837 */
3838 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3839
3840# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3841 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3842 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3843 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3844 /* pacibsp */
3845 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3846# endif
3847
3848 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3849 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3850 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3851 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3852 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3853 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3854 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3855 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3856 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3857 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3858 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3859 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3860 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3861 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3862 /* Save the BP and LR (ret address) registers at the top of the frame. */
3863 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3864 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3865 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3866 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3867 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3868 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3869
3870 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3871 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3872
3873 /* mov r28, r0 */
3874 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3875 /* mov r27, r1 */
3876 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3877
3878#else
3879# error "port me"
3880#endif
3881 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3882 return off;
3883}
3884
3885
3886
3887
3888/*********************************************************************************************************************************
3889* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
3890*********************************************************************************************************************************/
3891
3892#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3893 { \
3894 pReNative->fMc = (a_fMcFlags); \
3895 pReNative->fCImpl = (a_fCImplFlags); \
3896 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
3897
3898/** We have to get to the end in recompilation mode, as otherwise we won't
3899 * generate code for all the IEM_MC_IF_XXX branches. */
3900#define IEM_MC_END() \
3901 } return off
3902
3903
3904
3905/*********************************************************************************************************************************
3906* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
3907*********************************************************************************************************************************/
3908
3909#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3910 pReNative->fMc = 0; \
3911 pReNative->fCImpl = (a_fFlags); \
3912 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3913
3914
3915#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3916 pReNative->fMc = 0; \
3917 pReNative->fCImpl = (a_fFlags); \
3918 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3919
3920DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3921 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3922{
3923 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3924}
3925
3926
3927#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3928 pReNative->fMc = 0; \
3929 pReNative->fCImpl = (a_fFlags); \
3930 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3931
3932DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3933 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3934{
3935 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3936}
3937
3938
3939#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3940 pReNative->fMc = 0; \
3941 pReNative->fCImpl = (a_fFlags); \
3942 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3943
3944DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3945 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
3946 uint64_t uArg2)
3947{
3948 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3949}
3950
3951
3952
3953/*********************************************************************************************************************************
3954* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
3955*********************************************************************************************************************************/
3956
3957/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
3958 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
3959DECL_INLINE_THROW(uint32_t)
3960iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3961{
3962 /*
3963 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
3964 * return with special status code and make the execution loop deal with
3965 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
3966 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
3967 * could continue w/o interruption, it probably will drop into the
3968 * debugger, so not worth the effort of trying to services it here and we
3969 * just lump it in with the handling of the others.
3970 *
3971 * To simplify the code and the register state management even more (wrt
3972 * immediate in AND operation), we always update the flags and skip the
3973 * extra check associated conditional jump.
3974 */
3975 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
3976 <= UINT32_MAX);
3977 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3978 kIemNativeGstRegUse_ForUpdate);
3979 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
3980 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
3981 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
3982 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
3983 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
3984
3985 /* Free but don't flush the EFLAGS register. */
3986 iemNativeRegFreeTmp(pReNative, idxEflReg);
3987
3988 return off;
3989}
3990
3991
3992#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
3993 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
3994
3995#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
3996 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
3997 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
3998
3999/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4000DECL_INLINE_THROW(uint32_t)
4001iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4002{
4003 /* Allocate a temporary PC register. */
4004 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4005
4006 /* Perform the addition and store the result. */
4007 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4008 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4009
4010 /* Free but don't flush the PC register. */
4011 iemNativeRegFreeTmp(pReNative, idxPcReg);
4012
4013 return off;
4014}
4015
4016
4017#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4018 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4019
4020#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4021 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4022 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4023
4024/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4025DECL_INLINE_THROW(uint32_t)
4026iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4027{
4028 /* Allocate a temporary PC register. */
4029 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4030
4031 /* Perform the addition and store the result. */
4032 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4033 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4034
4035 /* Free but don't flush the PC register. */
4036 iemNativeRegFreeTmp(pReNative, idxPcReg);
4037
4038 return off;
4039}
4040
4041
4042#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4043 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4044
4045#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4046 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4047 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4048
4049/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4050DECL_INLINE_THROW(uint32_t)
4051iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4052{
4053 /* Allocate a temporary PC register. */
4054 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4055
4056 /* Perform the addition and store the result. */
4057 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4058 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4059 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4060
4061 /* Free but don't flush the PC register. */
4062 iemNativeRegFreeTmp(pReNative, idxPcReg);
4063
4064 return off;
4065}
4066
4067
4068
4069/*********************************************************************************************************************************
4070* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4071*********************************************************************************************************************************/
4072
4073#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4074 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4075 (a_enmEffOpSize), pCallEntry->idxInstr)
4076
4077#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4078 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4079 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4080
4081#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4082 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4083 IEMMODE_16BIT, pCallEntry->idxInstr)
4084
4085#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4086 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4087 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4088
4089#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4091 IEMMODE_64BIT, pCallEntry->idxInstr)
4092
4093#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4094 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4095 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4096
4097/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4098 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4099 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4100DECL_INLINE_THROW(uint32_t)
4101iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4102 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4103{
4104 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4105
4106 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4107 off = iemNativeRegFlushPendingWrites(pReNative, off);
4108
4109 /* Allocate a temporary PC register. */
4110 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4111
4112 /* Perform the addition. */
4113 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4114
4115 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4116 {
4117 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4118 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4119 }
4120 else
4121 {
4122 /* Just truncate the result to 16-bit IP. */
4123 Assert(enmEffOpSize == IEMMODE_16BIT);
4124 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4125 }
4126 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4127
4128 /* Free but don't flush the PC register. */
4129 iemNativeRegFreeTmp(pReNative, idxPcReg);
4130
4131 return off;
4132}
4133
4134
4135#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4136 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4137 (a_enmEffOpSize), pCallEntry->idxInstr)
4138
4139#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4140 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4141 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4142
4143#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4144 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4145 IEMMODE_16BIT, pCallEntry->idxInstr)
4146
4147#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4148 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4149 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4150
4151#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4152 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4153 IEMMODE_32BIT, pCallEntry->idxInstr)
4154
4155#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4156 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4157 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4158
4159/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4160 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4161 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4162DECL_INLINE_THROW(uint32_t)
4163iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4164 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4165{
4166 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4167
4168 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4169 off = iemNativeRegFlushPendingWrites(pReNative, off);
4170
4171 /* Allocate a temporary PC register. */
4172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4173
4174 /* Perform the addition. */
4175 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4176
4177 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4178 if (enmEffOpSize == IEMMODE_16BIT)
4179 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4180
4181 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4182 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4183
4184 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4185
4186 /* Free but don't flush the PC register. */
4187 iemNativeRegFreeTmp(pReNative, idxPcReg);
4188
4189 return off;
4190}
4191
4192
4193#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4194 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4195
4196#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4197 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4198 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4199
4200#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4201 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4202
4203#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4204 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4205 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4206
4207#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4208 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4209
4210#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4211 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4212 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4213
4214/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4215DECL_INLINE_THROW(uint32_t)
4216iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4217 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4218{
4219 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4220 off = iemNativeRegFlushPendingWrites(pReNative, off);
4221
4222 /* Allocate a temporary PC register. */
4223 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4224
4225 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4226 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4227 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4228 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4229 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4230
4231 /* Free but don't flush the PC register. */
4232 iemNativeRegFreeTmp(pReNative, idxPcReg);
4233
4234 return off;
4235}
4236
4237
4238
4239/*********************************************************************************************************************************
4240* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4241*********************************************************************************************************************************/
4242
4243/**
4244 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4245 *
4246 * @returns Pointer to the condition stack entry on success, NULL on failure
4247 * (too many nestings)
4248 */
4249DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4250{
4251 uint32_t const idxStack = pReNative->cCondDepth;
4252 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4253
4254 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4255 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4256
4257 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4258 pEntry->fInElse = false;
4259 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4260 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4261
4262 return pEntry;
4263}
4264
4265
4266/**
4267 * Start of the if-block, snapshotting the register and variable state.
4268 */
4269DECL_INLINE_THROW(void)
4270iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4271{
4272 Assert(offIfBlock != UINT32_MAX);
4273 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4274 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4275 Assert(!pEntry->fInElse);
4276
4277 /* Define the start of the IF block if request or for disassembly purposes. */
4278 if (idxLabelIf != UINT32_MAX)
4279 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4280#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4281 else
4282 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4283#else
4284 RT_NOREF(offIfBlock);
4285#endif
4286
4287 /* Copy the initial state so we can restore it in the 'else' block. */
4288 pEntry->InitialState = pReNative->Core;
4289}
4290
4291
4292#define IEM_MC_ELSE() } while (0); \
4293 off = iemNativeEmitElse(pReNative, off); \
4294 do {
4295
4296/** Emits code related to IEM_MC_ELSE. */
4297DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4298{
4299 /* Check sanity and get the conditional stack entry. */
4300 Assert(off != UINT32_MAX);
4301 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4302 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4303 Assert(!pEntry->fInElse);
4304
4305 /* Jump to the endif */
4306 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4307
4308 /* Define the else label and enter the else part of the condition. */
4309 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4310 pEntry->fInElse = true;
4311
4312 /* Snapshot the core state so we can do a merge at the endif and restore
4313 the snapshot we took at the start of the if-block. */
4314 pEntry->IfFinalState = pReNative->Core;
4315 pReNative->Core = pEntry->InitialState;
4316
4317 return off;
4318}
4319
4320
4321#define IEM_MC_ENDIF() } while (0); \
4322 off = iemNativeEmitEndIf(pReNative, off)
4323
4324/** Emits code related to IEM_MC_ENDIF. */
4325DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4326{
4327 /* Check sanity and get the conditional stack entry. */
4328 Assert(off != UINT32_MAX);
4329 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4330 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4331
4332 /*
4333 * Now we have find common group with the core state at the end of the
4334 * if-final. Use the smallest common denominator and just drop anything
4335 * that isn't the same in both states.
4336 */
4337 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4338 * which is why we're doing this at the end of the else-block.
4339 * But we'd need more info about future for that to be worth the effort. */
4340 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4341 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4342 {
4343 /* shadow guest stuff first. */
4344 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4345 if (fGstRegs)
4346 {
4347 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4348 do
4349 {
4350 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4351 fGstRegs &= ~RT_BIT_64(idxGstReg);
4352
4353 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4354 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4355 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4356 {
4357 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4358 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4359 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4360 }
4361 } while (fGstRegs);
4362 }
4363 else
4364 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4365
4366 /* Check variables next. For now we must require them to be identical
4367 or stuff we can recreate. */
4368 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4369 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4370 if (fVars)
4371 {
4372 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4373 do
4374 {
4375 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4376 fVars &= ~RT_BIT_32(idxVar);
4377
4378 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4379 {
4380 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4381 continue;
4382 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4383 {
4384 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4385 if (idxHstReg != UINT8_MAX)
4386 {
4387 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4388 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4389 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4390 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4391 }
4392 continue;
4393 }
4394 }
4395 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4396 continue;
4397
4398 /* Irreconcilable, so drop it. */
4399 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4400 if (idxHstReg != UINT8_MAX)
4401 {
4402 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4403 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4404 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4405 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4406 }
4407 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4408 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4409 } while (fVars);
4410 }
4411
4412 /* Finally, check that the host register allocations matches. */
4413 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4414 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4415 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4416 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4417 }
4418
4419 /*
4420 * Define the endif label and maybe the else one if we're still in the 'if' part.
4421 */
4422 if (!pEntry->fInElse)
4423 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4424 else
4425 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4426 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4427
4428 /* Pop the conditional stack.*/
4429 pReNative->cCondDepth -= 1;
4430
4431 return off;
4432}
4433
4434
4435#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4436 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4437 do {
4438
4439/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4440DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4441{
4442 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4443
4444 /* Get the eflags. */
4445 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4446 kIemNativeGstRegUse_ReadOnly);
4447
4448 /* Test and jump. */
4449 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4450
4451 /* Free but don't flush the EFlags register. */
4452 iemNativeRegFreeTmp(pReNative, idxEflReg);
4453
4454 /* Make a copy of the core state now as we start the if-block. */
4455 iemNativeCondStartIfBlock(pReNative, off);
4456
4457 return off;
4458}
4459
4460
4461#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4462 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4463 do {
4464
4465/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4466DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4467{
4468 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4469
4470 /* Get the eflags. */
4471 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4472 kIemNativeGstRegUse_ReadOnly);
4473
4474 /* Test and jump. */
4475 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4476
4477 /* Free but don't flush the EFlags register. */
4478 iemNativeRegFreeTmp(pReNative, idxEflReg);
4479
4480 /* Make a copy of the core state now as we start the if-block. */
4481 iemNativeCondStartIfBlock(pReNative, off);
4482
4483 return off;
4484}
4485
4486
4487#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4488 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4489 do {
4490
4491/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4492DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4493{
4494 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4495
4496 /* Get the eflags. */
4497 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4498 kIemNativeGstRegUse_ReadOnly);
4499
4500 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4501 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4502
4503 /* Test and jump. */
4504 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4505
4506 /* Free but don't flush the EFlags register. */
4507 iemNativeRegFreeTmp(pReNative, idxEflReg);
4508
4509 /* Make a copy of the core state now as we start the if-block. */
4510 iemNativeCondStartIfBlock(pReNative, off);
4511
4512 return off;
4513}
4514
4515
4516#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4517 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4518 do {
4519
4520/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4521DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4522{
4523 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4524
4525 /* Get the eflags. */
4526 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4527 kIemNativeGstRegUse_ReadOnly);
4528
4529 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4530 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4531
4532 /* Test and jump. */
4533 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4534
4535 /* Free but don't flush the EFlags register. */
4536 iemNativeRegFreeTmp(pReNative, idxEflReg);
4537
4538 /* Make a copy of the core state now as we start the if-block. */
4539 iemNativeCondStartIfBlock(pReNative, off);
4540
4541 return off;
4542}
4543
4544
4545#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4546 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4547 do {
4548
4549#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4550 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4551 do {
4552
4553/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4554DECL_INLINE_THROW(uint32_t)
4555iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4556 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4557{
4558 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4559
4560 /* Get the eflags. */
4561 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4562 kIemNativeGstRegUse_ReadOnly);
4563
4564 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4565 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4566
4567 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4568 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4569 Assert(iBitNo1 != iBitNo2);
4570
4571#ifdef RT_ARCH_AMD64
4572 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4573
4574 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4575 if (iBitNo1 > iBitNo2)
4576 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4577 else
4578 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4579 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4580
4581#elif defined(RT_ARCH_ARM64)
4582 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4583 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4584
4585 /* and tmpreg, eflreg, #1<<iBitNo1 */
4586 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4587
4588 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4589 if (iBitNo1 > iBitNo2)
4590 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4591 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4592 else
4593 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4594 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4595
4596 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4597
4598#else
4599# error "Port me"
4600#endif
4601
4602 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4603 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4604 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4605
4606 /* Free but don't flush the EFlags and tmp registers. */
4607 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4608 iemNativeRegFreeTmp(pReNative, idxEflReg);
4609
4610 /* Make a copy of the core state now as we start the if-block. */
4611 iemNativeCondStartIfBlock(pReNative, off);
4612
4613 return off;
4614}
4615
4616
4617#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4618 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4619 do {
4620
4621#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4622 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4623 do {
4624
4625/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4626 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4629 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4630{
4631 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4632
4633 /* We need an if-block label for the non-inverted variant. */
4634 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4635 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4636
4637 /* Get the eflags. */
4638 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4639 kIemNativeGstRegUse_ReadOnly);
4640
4641 /* Translate the flag masks to bit numbers. */
4642 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4643 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4644
4645 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4646 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4647 Assert(iBitNo1 != iBitNo);
4648
4649 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4650 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4651 Assert(iBitNo2 != iBitNo);
4652 Assert(iBitNo2 != iBitNo1);
4653
4654#ifdef RT_ARCH_AMD64
4655 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4656#elif defined(RT_ARCH_ARM64)
4657 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4658#endif
4659
4660 /* Check for the lone bit first. */
4661 if (!fInverted)
4662 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4663 else
4664 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4665
4666 /* Then extract and compare the other two bits. */
4667#ifdef RT_ARCH_AMD64
4668 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4669 if (iBitNo1 > iBitNo2)
4670 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4671 else
4672 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4673 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4674
4675#elif defined(RT_ARCH_ARM64)
4676 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4677
4678 /* and tmpreg, eflreg, #1<<iBitNo1 */
4679 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4680
4681 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4682 if (iBitNo1 > iBitNo2)
4683 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4684 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4685 else
4686 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4687 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4688
4689 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4690
4691#else
4692# error "Port me"
4693#endif
4694
4695 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4696 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4697 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4698
4699 /* Free but don't flush the EFlags and tmp registers. */
4700 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4701 iemNativeRegFreeTmp(pReNative, idxEflReg);
4702
4703 /* Make a copy of the core state now as we start the if-block. */
4704 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4705
4706 return off;
4707}
4708
4709
4710#define IEM_MC_IF_CX_IS_NZ() \
4711 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4712 do {
4713
4714/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4715DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4716{
4717 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4718
4719 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4720 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4721 kIemNativeGstRegUse_ReadOnly);
4722 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4723 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4724
4725 iemNativeCondStartIfBlock(pReNative, off);
4726 return off;
4727}
4728
4729
4730#define IEM_MC_IF_ECX_IS_NZ() \
4731 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4732 do {
4733
4734#define IEM_MC_IF_RCX_IS_NZ() \
4735 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4736 do {
4737
4738/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4739DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4740{
4741 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4742
4743 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4744 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4745 kIemNativeGstRegUse_ReadOnly);
4746 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4747 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4748
4749 iemNativeCondStartIfBlock(pReNative, off);
4750 return off;
4751}
4752
4753
4754#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4755 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4756 do {
4757
4758#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4759 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4760 do {
4761
4762/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4763DECL_INLINE_THROW(uint32_t)
4764iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4765{
4766 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4767
4768 /* We have to load both RCX and EFLAGS before we can start branching,
4769 otherwise we'll end up in the else-block with an inconsistent
4770 register allocator state.
4771 Doing EFLAGS first as it's more likely to be loaded, right? */
4772 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4773 kIemNativeGstRegUse_ReadOnly);
4774 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4775 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4776 kIemNativeGstRegUse_ReadOnly);
4777
4778 /** @todo we could reduce this to a single branch instruction by spending a
4779 * temporary register and some setnz stuff. Not sure if loops are
4780 * worth it. */
4781 /* Check CX. */
4782 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4783
4784 /* Check the EFlags bit. */
4785 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4786 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4787 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4788 !fCheckIfSet /*fJmpIfSet*/);
4789
4790 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4791 iemNativeRegFreeTmp(pReNative, idxEflReg);
4792
4793 iemNativeCondStartIfBlock(pReNative, off);
4794 return off;
4795}
4796
4797
4798#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4799 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4800 do {
4801
4802#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4803 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4804 do {
4805
4806#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4807 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4808 do {
4809
4810#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4811 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4812 do {
4813
4814/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4815 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4816 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4817 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4818DECL_INLINE_THROW(uint32_t)
4819iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4820 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4821{
4822 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4823
4824 /* We have to load both RCX and EFLAGS before we can start branching,
4825 otherwise we'll end up in the else-block with an inconsistent
4826 register allocator state.
4827 Doing EFLAGS first as it's more likely to be loaded, right? */
4828 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4829 kIemNativeGstRegUse_ReadOnly);
4830 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4831 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4832 kIemNativeGstRegUse_ReadOnly);
4833
4834 /** @todo we could reduce this to a single branch instruction by spending a
4835 * temporary register and some setnz stuff. Not sure if loops are
4836 * worth it. */
4837 /* Check RCX/ECX. */
4838 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4839
4840 /* Check the EFlags bit. */
4841 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4842 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4843 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4844 !fCheckIfSet /*fJmpIfSet*/);
4845
4846 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4847 iemNativeRegFreeTmp(pReNative, idxEflReg);
4848
4849 iemNativeCondStartIfBlock(pReNative, off);
4850 return off;
4851}
4852
4853
4854
4855/*********************************************************************************************************************************
4856* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
4857*********************************************************************************************************************************/
4858/** Number of hidden arguments for CIMPL calls.
4859 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
4860#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4861# define IEM_CIMPL_HIDDEN_ARGS 3
4862#else
4863# define IEM_CIMPL_HIDDEN_ARGS 2
4864#endif
4865
4866#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
4867 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
4868
4869#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
4870 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
4871
4872#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
4873 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
4874
4875#define IEM_MC_LOCAL(a_Type, a_Name) \
4876 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
4877
4878#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
4879 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
4880
4881
4882/**
4883 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
4884 */
4885DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
4886{
4887 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
4888 return IEM_CIMPL_HIDDEN_ARGS;
4889 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
4890 return 1;
4891 return 0;
4892}
4893
4894
4895/**
4896 * Internal work that allocates a variable with kind set to
4897 * kIemNativeVarKind_Invalid and no current stack allocation.
4898 *
4899 * The kind will either be set by the caller or later when the variable is first
4900 * assigned a value.
4901 */
4902static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
4903{
4904 Assert(cbType > 0 && cbType <= 64);
4905 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
4906 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
4907 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
4908 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
4909 pReNative->Core.aVars[idxVar].cbVar = cbType;
4910 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
4911 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4912 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
4913 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
4914 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
4915 pReNative->Core.aVars[idxVar].u.uValue = 0;
4916 return idxVar;
4917}
4918
4919
4920/**
4921 * Internal work that allocates an argument variable w/o setting enmKind.
4922 */
4923static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
4924{
4925 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
4926 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
4927 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
4928
4929 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
4930 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
4931 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
4932 return idxVar;
4933}
4934
4935
4936/**
4937 * Changes the variable to a stack variable.
4938 *
4939 * Currently this is s only possible to do the first time the variable is used,
4940 * switching later is can be implemented but not done.
4941 *
4942 * @param pReNative The recompiler state.
4943 * @param idxVar The variable.
4944 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
4945 */
4946static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4947{
4948 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
4949 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4950 {
4951 /* We could in theory transition from immediate to stack as well, but it
4952 would involve the caller doing work storing the value on the stack. So,
4953 till that's required we only allow transition from invalid. */
4954 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
4955 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
4956 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
4957
4958 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
4959 {
4960 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
4961 {
4962 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
4963 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4964 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
4965 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
4966 return;
4967 }
4968 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
4969 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
4970 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
4971 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
4972 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
4973 uint32_t bmStack = ~pReNative->Core.bmStack;
4974 while (bmStack != UINT32_MAX)
4975 {
4976 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
4977 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4978 if (!(iSlot & fBitAlignMask))
4979 {
4980 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
4981 {
4982 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
4983 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
4984 return;
4985 }
4986 }
4987 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
4988 }
4989 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
4990 }
4991 }
4992}
4993
4994
4995/**
4996 * Changes it to a variable with a constant value.
4997 *
4998 * This does not require stack storage as we know the value and can always
4999 * reload it, unless of course it's referenced.
5000 *
5001 * @param pReNative The recompiler state.
5002 * @param idxVar The variable.
5003 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5004 */
5005static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5006{
5007 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5008 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5009 {
5010 /* Only simple trasnsitions for now. */
5011 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5012 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5013 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5014 }
5015 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5016}
5017
5018
5019/**
5020 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5021 *
5022 * @param pReNative The recompiler state.
5023 * @param idxVar The variable.
5024 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5025 */
5026static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5027{
5028 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5029 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5030
5031 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5032 {
5033 /* Only simple trasnsitions for now. */
5034 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5035 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5036 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5037 }
5038 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5039
5040 /* Update the other variable, ensure it's a stack variable. */
5041 /** @todo handle variables with const values... that's go boom now. */
5042 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5043 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5044}
5045
5046
5047DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5048{
5049 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5050}
5051
5052
5053DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5054{
5055 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5056 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5057 return idxVar;
5058}
5059
5060
5061DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5062{
5063 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5064 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5065 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5066 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5067
5068 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5069 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5070 return idxArgVar;
5071}
5072
5073
5074DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5075{
5076 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5077 iemNativeVarSetKindToStack(pReNative, idxVar);
5078 return idxVar;
5079}
5080
5081
5082DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5083{
5084 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5085 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5086 return idxVar;
5087}
5088
5089
5090/**
5091 * Makes sure variable @a idxVar has a register assigned to it.
5092 *
5093 * @returns The host register number.
5094 * @param pReNative The recompiler state.
5095 * @param idxVar The variable.
5096 * @param poff Pointer to the instruction buffer offset.
5097 * In case a register needs to be freed up.
5098 */
5099DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5100{
5101 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5102
5103 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5104 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5105 return idxReg;
5106
5107 /*
5108 * We have to allocate a register for the variable, even if its a stack one
5109 * as we don't know if there are modification being made to it before its
5110 * finalized (todo: analyze and insert hints about that?).
5111 *
5112 * If we can, we try get the correct register for argument variables. This
5113 * is assuming that most argument variables are fetched as close as possible
5114 * to the actual call, so that there aren't any interfering hidden calls
5115 * (memory accesses, etc) inbetween.
5116 *
5117 * If we cannot or it's a variable, we make sure no argument registers
5118 * that will be used by this MC block will be allocated here, and we always
5119 * prefer non-volatile registers to avoid needing to spill stuff for internal
5120 * call.
5121 */
5122 /** @todo Detect too early argument value fetches and warn about hidden
5123 * calls causing less optimal code to be generated in the python script. */
5124
5125 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5126 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5127 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5128 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5129 else
5130 {
5131 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5132 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5133 & ~pReNative->Core.bmHstRegsWithGstShadow
5134 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5135 & fNotArgsMask;
5136 if (fRegs)
5137 {
5138 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5139 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5140 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5141 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5142 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5143 }
5144 else
5145 {
5146 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5147 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5148 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5149 }
5150 }
5151 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5152 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5153 return idxReg;
5154}
5155
5156
5157
5158/*********************************************************************************************************************************
5159* Emitters for IEM_MC_CALL_CIMPL_XXX *
5160*********************************************************************************************************************************/
5161
5162/**
5163 * Emits code to load a reference to the given guest register into @a idxGprDst.
5164 */
5165DECL_INLINE_THROW(uint32_t)
5166iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5167 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5168{
5169 /*
5170 * Get the offset relative to the CPUMCTX structure.
5171 */
5172 uint32_t offCpumCtx;
5173 switch (enmClass)
5174 {
5175 case kIemNativeGstRegRef_Gpr:
5176 Assert(idxRegInClass < 16);
5177 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5178 break;
5179
5180 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5181 Assert(idxRegInClass < 4);
5182 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5183 break;
5184
5185 case kIemNativeGstRegRef_EFlags:
5186 Assert(idxRegInClass == 0);
5187 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5188 break;
5189
5190 case kIemNativeGstRegRef_MxCsr:
5191 Assert(idxRegInClass == 0);
5192 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5193 break;
5194
5195 case kIemNativeGstRegRef_FpuReg:
5196 Assert(idxRegInClass < 8);
5197 AssertFailed(); /** @todo what kind of indexing? */
5198 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5199 break;
5200
5201 case kIemNativeGstRegRef_MReg:
5202 Assert(idxRegInClass < 8);
5203 AssertFailed(); /** @todo what kind of indexing? */
5204 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5205 break;
5206
5207 case kIemNativeGstRegRef_XReg:
5208 Assert(idxRegInClass < 16);
5209 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5210 break;
5211
5212 default:
5213 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5214 }
5215
5216 /*
5217 * Load the value into the destination register.
5218 */
5219#ifdef RT_ARCH_AMD64
5220 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5221
5222#elif defined(RT_ARCH_ARM64)
5223 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5224 Assert(offCpumCtx < 4096);
5225 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5226
5227#else
5228# error "Port me!"
5229#endif
5230
5231 return off;
5232}
5233
5234
5235/**
5236 * Common code for CIMPL and AIMPL calls.
5237 *
5238 * These are calls that uses argument variables and such. They should not be
5239 * confused with internal calls required to implement an MC operation,
5240 * like a TLB load and similar.
5241 *
5242 * Upon return all that is left to do is to load any hidden arguments and
5243 * perform the call. All argument variables are freed.
5244 *
5245 * @returns New code buffer offset; throws VBox status code on error.
5246 * @param pReNative The native recompile state.
5247 * @param off The code buffer offset.
5248 * @param cArgs The total nubmer of arguments (includes hidden
5249 * count).
5250 * @param cHiddenArgs The number of hidden arguments. The hidden
5251 * arguments must not have any variable declared for
5252 * them, whereas all the regular arguments must
5253 * (tstIEMCheckMc ensures this).
5254 */
5255DECL_HIDDEN_THROW(uint32_t)
5256iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5257{
5258#ifdef VBOX_STRICT
5259 /*
5260 * Assert sanity.
5261 */
5262 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5263 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5264 for (unsigned i = 0; i < cHiddenArgs; i++)
5265 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5266 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5267 {
5268 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5269 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5270 }
5271#endif
5272
5273 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5274
5275 /*
5276 * First, go over the host registers that will be used for arguments and make
5277 * sure they either hold the desired argument or are free.
5278 */
5279 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5280 for (uint32_t i = 0; i < cRegArgs; i++)
5281 {
5282 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5283 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5284 {
5285 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5286 {
5287 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5288 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5289 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5290 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5291 if (uArgNo == i)
5292 { /* prefect */ }
5293 else
5294 {
5295 /* The variable allocator logic should make sure this is impossible. */
5296 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5297
5298 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5299 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5300 else
5301 {
5302 /* just free it, can be reloaded if used again */
5303 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5304 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5305 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5306 }
5307 }
5308 }
5309 else
5310 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5311 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5312 }
5313 }
5314
5315 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5316
5317 /*
5318 * Make sure the argument variables are loaded into their respective registers.
5319 *
5320 * We can optimize this by ASSUMING that any register allocations are for
5321 * registeres that have already been loaded and are ready. The previous step
5322 * saw to that.
5323 */
5324 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5325 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5326 {
5327 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5328 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5329 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5330 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5331 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5332 else
5333 {
5334 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5335 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5336 {
5337 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5338 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5339 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5340 | RT_BIT_32(idxArgReg);
5341 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5342 }
5343 else
5344 {
5345 /* Use ARG0 as temp for stuff we need registers for. */
5346 switch (pReNative->Core.aVars[idxVar].enmKind)
5347 {
5348 case kIemNativeVarKind_Stack:
5349 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5350 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5351 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg,
5352 IEMNATIVE_FP_OFF_STACK_VARS
5353 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5354 continue;
5355
5356 case kIemNativeVarKind_Immediate:
5357 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5358 continue;
5359
5360 case kIemNativeVarKind_VarRef:
5361 {
5362 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5363 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5364 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5365 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5366 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5367 IEMNATIVE_FP_OFF_STACK_VARS
5368 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5369 continue;
5370 }
5371
5372 case kIemNativeVarKind_GstRegRef:
5373 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5374 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5375 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5376 continue;
5377
5378 case kIemNativeVarKind_Invalid:
5379 case kIemNativeVarKind_End:
5380 break;
5381 }
5382 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5383 }
5384 }
5385 }
5386#ifdef VBOX_STRICT
5387 else
5388 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5389 {
5390 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5391 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5392 }
5393#endif
5394
5395#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5396 /*
5397 * If there are any stack arguments, make sure they are in their place as well.
5398 *
5399 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5400 * caller will load it later and it must be free (see first loop).
5401 */
5402 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5403 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5404 {
5405 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5406 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5407 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5408 {
5409 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5410 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5411 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5412 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5413 }
5414 else
5415 {
5416 /* Use ARG0 as temp for stuff we need registers for. */
5417 switch (pReNative->Core.aVars[idxVar].enmKind)
5418 {
5419 case kIemNativeVarKind_Stack:
5420 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5421 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5422 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5423 IEMNATIVE_FP_OFF_STACK_VARS
5424 + pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t));
5425 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5426 continue;
5427
5428 case kIemNativeVarKind_Immediate:
5429 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5430 continue;
5431
5432 case kIemNativeVarKind_VarRef:
5433 {
5434 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5435 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5436 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5437 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5438 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5439 IEMNATIVE_FP_OFF_STACK_VARS
5440 + pReNative->Core.aVars[idxOtherVar].idxStackSlot * sizeof(uint64_t));
5441 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5442 continue;
5443 }
5444
5445 case kIemNativeVarKind_GstRegRef:
5446 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5447 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5448 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5449 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5450 continue;
5451
5452 case kIemNativeVarKind_Invalid:
5453 case kIemNativeVarKind_End:
5454 break;
5455 }
5456 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5457 }
5458 }
5459#else
5460 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5461#endif
5462
5463 /*
5464 * Free all argument variables (simplified).
5465 * Their lifetime always expires with the call they are for.
5466 */
5467 /** @todo Make the python script check that arguments aren't used after
5468 * IEM_MC_CALL_XXXX. */
5469 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5470 {
5471 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
5472 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5473 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5474 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5475 }
5476 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5477
5478 /*
5479 * Flush volatile registers as we make the call.
5480 */
5481 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5482
5483 return off;
5484}
5485
5486
5487/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5488DECL_HIDDEN_THROW(uint32_t)
5489iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5490 uintptr_t pfnCImpl, uint8_t cArgs)
5491
5492{
5493 /*
5494 * Do all the call setup and cleanup.
5495 */
5496 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5497
5498 /*
5499 * Load the two hidden arguments.
5500 */
5501#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5502 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5503 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5504 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5505#else
5506 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5507 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5508#endif
5509
5510 /*
5511 * Make the call and check the return code.
5512 */
5513 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5514#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5515 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5516#endif
5517 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5518}
5519
5520
5521#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
5522 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0)
5523
5524/** Emits code for IEM_MC_CALL_CIMPL_1. */
5525DECL_INLINE_THROW(uint32_t)
5526iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5527 uintptr_t pfnCImpl, uint8_t idxArg0)
5528{
5529 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5530 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5531 RT_NOREF_PV(idxArg0);
5532
5533 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 1);
5534}
5535
5536
5537#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
5538 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1)
5539
5540/** Emits code for IEM_MC_CALL_CIMPL_2. */
5541DECL_INLINE_THROW(uint32_t)
5542iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5543 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
5544{
5545 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5546 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5547 RT_NOREF_PV(idxArg0);
5548
5549 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5550 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5551 RT_NOREF_PV(idxArg1);
5552
5553 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 2);
5554}
5555
5556
5557#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
5558 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2)
5559
5560/** Emits code for IEM_MC_CALL_CIMPL_3. */
5561DECL_INLINE_THROW(uint32_t)
5562iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5563 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
5564{
5565pReNative->pInstrBuf[off++] = 0xcc;
5566 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5567 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5568 RT_NOREF_PV(idxArg0);
5569
5570 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5571 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5572 RT_NOREF_PV(idxArg1);
5573
5574 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5575 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5576 RT_NOREF_PV(idxArg2);
5577
5578 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 3);
5579}
5580
5581
5582#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3) \
5583 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
5584
5585/** Emits code for IEM_MC_CALL_CIMPL_4. */
5586DECL_INLINE_THROW(uint32_t)
5587iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5588 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
5589{
5590pReNative->pInstrBuf[off++] = 0xcc;
5591 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5592 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5593 RT_NOREF_PV(idxArg0);
5594
5595 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5596 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5597 RT_NOREF_PV(idxArg1);
5598
5599 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5600 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5601 RT_NOREF_PV(idxArg2);
5602
5603 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5604 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5605 RT_NOREF_PV(idxArg3);
5606
5607 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 4);
5608}
5609
5610
5611#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2, a3, a4) \
5612 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
5613
5614/** Emits code for IEM_MC_CALL_CIMPL_4. */
5615DECL_INLINE_THROW(uint32_t)
5616iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5617 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
5618{
5619pReNative->pInstrBuf[off++] = 0xcc;
5620 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5621 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5622 RT_NOREF_PV(idxArg0);
5623
5624 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5625 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5626 RT_NOREF_PV(idxArg1);
5627
5628 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5629 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5630 RT_NOREF_PV(idxArg2);
5631
5632 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5633 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5634 RT_NOREF_PV(idxArg3);
5635
5636 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
5637 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
5638 RT_NOREF_PV(idxArg4);
5639
5640 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, pfnCImpl, 5);
5641}
5642
5643
5644
5645/*********************************************************************************************************************************
5646* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
5647*********************************************************************************************************************************/
5648
5649#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
5650 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
5651
5652/** Emits code for IEM_MC_FETCH_GREG_U16. */
5653DECL_INLINE_THROW(uint32_t)
5654iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
5655{
5656 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
5657 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
5658
5659 /*
5660 * We can either just load the low 16-bit of the GPR into a host register
5661 * for the variable, or we can do so via a shadow copy host register. The
5662 * latter will avoid having to reload it if it's being stored later, but
5663 * will waste a host register if it isn't touched again. Since we don't
5664 * know what going to happen, we choose the latter for now.
5665 */
5666 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5667 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5668 kIemNativeGstRegUse_ReadOnly);
5669
5670 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5671 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
5672 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
5673
5674 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
5675 return off;
5676}
5677
5678
5679
5680/*********************************************************************************************************************************
5681* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
5682*********************************************************************************************************************************/
5683
5684#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
5685 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
5686
5687/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
5688DECL_INLINE_THROW(uint32_t)
5689iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
5690{
5691 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5692 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
5693 kIemNativeGstRegUse_ForUpdate);
5694#ifdef RT_ARCH_AMD64
5695 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5696
5697 /* To the lowest byte of the register: mov r8, imm8 */
5698 if (iGRegEx < 16)
5699 {
5700 if (idxGstTmpReg >= 8)
5701 pbCodeBuf[off++] = X86_OP_REX_B;
5702 else if (idxGstTmpReg >= 4)
5703 pbCodeBuf[off++] = X86_OP_REX;
5704 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5705 pbCodeBuf[off++] = u8Value;
5706 }
5707 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5708 else if (idxGstTmpReg < 4)
5709 {
5710 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5711 pbCodeBuf[off++] = u8Value;
5712 }
5713 else
5714 {
5715 /* ror reg64, 8 */
5716 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5717 pbCodeBuf[off++] = 0xc1;
5718 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5719 pbCodeBuf[off++] = 8;
5720
5721 /* mov reg8, imm8 */
5722 if (idxGstTmpReg >= 8)
5723 pbCodeBuf[off++] = X86_OP_REX_B;
5724 else if (idxGstTmpReg >= 4)
5725 pbCodeBuf[off++] = X86_OP_REX;
5726 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5727 pbCodeBuf[off++] = u8Value;
5728
5729 /* rol reg64, 8 */
5730 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5731 pbCodeBuf[off++] = 0xc1;
5732 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5733 pbCodeBuf[off++] = 8;
5734 }
5735
5736#elif defined(RT_ARCH_ARM64)
5737 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5738 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5739 if (iGRegEx < 16)
5740 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5741 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5742 else
5743 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5744 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5745 iemNativeRegFreeTmp(pReNative, idxImmReg);
5746
5747#else
5748# error "Port me!"
5749#endif
5750
5751 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5752
5753 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
5754
5755 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5756 return off;
5757}
5758
5759
5760/*
5761 * General purpose register manipulation (add, sub).
5762 */
5763
5764#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5765 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5766
5767/** Emits code for IEM_MC_SUB_GREG_U16. */
5768DECL_INLINE_THROW(uint32_t)
5769iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5770{
5771 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5772 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5773 kIemNativeGstRegUse_ForUpdate);
5774
5775#ifdef RT_ARCH_AMD64
5776 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5777 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5778 if (idxGstTmpReg >= 8)
5779 pbCodeBuf[off++] = X86_OP_REX_B;
5780 if (uSubtrahend)
5781 {
5782 pbCodeBuf[off++] = 0xff; /* dec */
5783 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5784 }
5785 else
5786 {
5787 pbCodeBuf[off++] = 0x81;
5788 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5789 pbCodeBuf[off++] = uSubtrahend;
5790 pbCodeBuf[off++] = 0;
5791 }
5792
5793#else
5794 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5795 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5796
5797 /* sub tmp, gstgrp, uSubtrahend */
5798 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5799
5800 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5801 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5802
5803 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5804#endif
5805
5806 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5807
5808 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5809
5810 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5811 return off;
5812}
5813
5814
5815#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5816 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5817
5818#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5819 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5820
5821/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5822DECL_INLINE_THROW(uint32_t)
5823iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5824{
5825 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5826 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5827 kIemNativeGstRegUse_ForUpdate);
5828
5829#ifdef RT_ARCH_AMD64
5830 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5831 if (f64Bit)
5832 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5833 else if (idxGstTmpReg >= 8)
5834 pbCodeBuf[off++] = X86_OP_REX_B;
5835 if (uSubtrahend == 1)
5836 {
5837 /* dec */
5838 pbCodeBuf[off++] = 0xff;
5839 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5840 }
5841 else if (uSubtrahend < 128)
5842 {
5843 pbCodeBuf[off++] = 0x83; /* sub */
5844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5845 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5846 }
5847 else
5848 {
5849 pbCodeBuf[off++] = 0x81; /* sub */
5850 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5851 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5852 pbCodeBuf[off++] = 0;
5853 pbCodeBuf[off++] = 0;
5854 pbCodeBuf[off++] = 0;
5855 }
5856
5857#else
5858 /* sub tmp, gstgrp, uSubtrahend */
5859 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5860 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5861
5862#endif
5863
5864 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5865
5866 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5867
5868 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5869 return off;
5870}
5871
5872
5873
5874/*********************************************************************************************************************************
5875* Builtin functions *
5876*********************************************************************************************************************************/
5877
5878/**
5879 * Built-in function that calls a C-implemention function taking zero arguments.
5880 */
5881static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
5882{
5883 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
5884 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
5885 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
5886}
5887
5888
5889/**
5890 * Built-in function that checks for pending interrupts that can be delivered or
5891 * forced action flags.
5892 *
5893 * This triggers after the completion of an instruction, so EIP is already at
5894 * the next instruction. If an IRQ or important FF is pending, this will return
5895 * a non-zero status that stops TB execution.
5896 */
5897static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
5898{
5899 RT_NOREF(pCallEntry);
5900
5901 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
5902 and I'm too lazy to create a 'Fixed' version of that one. */
5903 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
5904 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
5905
5906 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
5907
5908 /* Again, we need to load the extended EFLAGS before we actually need them
5909 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
5910 loaded them inside the check, as the shadow state would not be correct
5911 when the code branches before the load. Ditto PC. */
5912 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5913 kIemNativeGstRegUse_ReadOnly);
5914
5915 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
5916
5917 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5918
5919 /*
5920 * Start by checking the local forced actions of the EMT we're on for IRQs
5921 * and other FFs that needs servicing.
5922 */
5923 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
5924 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
5925 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
5926 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5927 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
5928 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
5929 | VMCPU_FF_TLB_FLUSH
5930 | VMCPU_FF_UNHALT ),
5931 true /*fSetFlags*/);
5932 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
5933 uint32_t const offFixupJumpToVmCheck1 = off;
5934 off = iemNativeEmitJzToFixed(pReNative, off, 0);
5935
5936 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
5937 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
5938 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5939 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
5940 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
5941 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5942
5943 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
5944 suppressed by the CPU or not. */
5945 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
5946 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
5947 idxLabelReturnBreak);
5948
5949 /* We've got shadow flags set, so we must check that the PC they are valid
5950 for matches our current PC value. */
5951 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
5952 * a register. */
5953 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
5954 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
5955
5956 /*
5957 * Now check the force flags of the VM.
5958 */
5959 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
5960 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
5961 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
5962 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
5963 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
5964 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5965
5966 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
5967
5968 /*
5969 * We're good, no IRQs or FFs pending.
5970 */
5971 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5972 iemNativeRegFreeTmp(pReNative, idxEflReg);
5973 iemNativeRegFreeTmp(pReNative, idxPcReg);
5974
5975 return off;
5976}
5977
5978
5979/**
5980 * Built-in function checks if IEMCPU::fExec has the expected value.
5981 */
5982static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
5983{
5984 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
5985 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5986
5987 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5988 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
5989 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
5990 kIemNativeLabelType_ReturnBreak);
5991 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5992 return off;
5993}
5994
5995
5996
5997/*********************************************************************************************************************************
5998* The native code generator functions for each MC block. *
5999*********************************************************************************************************************************/
6000
6001
6002/*
6003 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6004 *
6005 * This should probably live in it's own file later, but lets see what the
6006 * compile times turn out to be first.
6007 */
6008#include "IEMNativeFunctions.cpp.h"
6009
6010
6011
6012/*********************************************************************************************************************************
6013* Recompiler Core. *
6014*********************************************************************************************************************************/
6015
6016
6017/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6018static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6019{
6020 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6021 pDis->cbCachedInstr += cbMaxRead;
6022 RT_NOREF(cbMinRead);
6023 return VERR_NO_DATA;
6024}
6025
6026
6027/**
6028 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6029 * @returns pszBuf.
6030 * @param fFlags The flags.
6031 * @param pszBuf The output buffer.
6032 * @param cbBuf The output buffer size. At least 32 bytes.
6033 */
6034DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6035{
6036 Assert(cbBuf >= 32);
6037 static RTSTRTUPLE const s_aModes[] =
6038 {
6039 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6040 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6041 /* [02] = */ { RT_STR_TUPLE("!2!") },
6042 /* [03] = */ { RT_STR_TUPLE("!3!") },
6043 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6044 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6045 /* [06] = */ { RT_STR_TUPLE("!6!") },
6046 /* [07] = */ { RT_STR_TUPLE("!7!") },
6047 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6048 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6049 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6050 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6051 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6052 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6053 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6054 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6055 /* [10] = */ { RT_STR_TUPLE("!10!") },
6056 /* [11] = */ { RT_STR_TUPLE("!11!") },
6057 /* [12] = */ { RT_STR_TUPLE("!12!") },
6058 /* [13] = */ { RT_STR_TUPLE("!13!") },
6059 /* [14] = */ { RT_STR_TUPLE("!14!") },
6060 /* [15] = */ { RT_STR_TUPLE("!15!") },
6061 /* [16] = */ { RT_STR_TUPLE("!16!") },
6062 /* [17] = */ { RT_STR_TUPLE("!17!") },
6063 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6064 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6065 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6066 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6067 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6068 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6069 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6070 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6071 };
6072 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6073 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6074 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6075
6076 pszBuf[off++] = ' ';
6077 pszBuf[off++] = 'C';
6078 pszBuf[off++] = 'P';
6079 pszBuf[off++] = 'L';
6080 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6081 Assert(off < 32);
6082
6083 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6084
6085 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6086 {
6087 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6088 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6089 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6090 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6091 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6092 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6093 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6094 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6095 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6096 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6097 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6098 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6099 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6100 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6101 };
6102 if (fFlags)
6103 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6104 if (s_aFlags[i].fFlag & fFlags)
6105 {
6106 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6107 pszBuf[off++] = ' ';
6108 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6109 off += s_aFlags[i].cchName;
6110 fFlags &= ~s_aFlags[i].fFlag;
6111 if (!fFlags)
6112 break;
6113 }
6114 pszBuf[off] = '\0';
6115
6116 return pszBuf;
6117}
6118
6119
6120DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
6121{
6122 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
6123
6124 char szDisBuf[512];
6125 DISSTATE Dis;
6126 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
6127 uint32_t const cNative = pTb->Native.cInstructions;
6128 uint32_t offNative = 0;
6129#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6130 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
6131#endif
6132 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6133 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6134 : DISCPUMODE_64BIT;
6135#ifdef RT_ARCH_AMD64
6136 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
6137#elif defined(RT_ARCH_ARM64)
6138 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
6139#else
6140# error "Port me"
6141#endif
6142
6143 /*
6144 * Print TB info.
6145 */
6146 pHlp->pfnPrintf(pHlp,
6147 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
6148 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
6149 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
6150 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
6151#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6152 if (pDbgInfo && pDbgInfo->cEntries > 1)
6153 {
6154 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
6155
6156 /*
6157 * This disassembly is driven by the debug info which follows the native
6158 * code and indicates when it starts with the next guest instructions,
6159 * where labels are and such things.
6160 */
6161 uint32_t idxThreadedCall = 0;
6162 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
6163 uint8_t idxRange = UINT8_MAX;
6164 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
6165 uint32_t offRange = 0;
6166 uint32_t offOpcodes = 0;
6167 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
6168 uint32_t const cDbgEntries = pDbgInfo->cEntries;
6169 uint32_t iDbgEntry = 1;
6170 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
6171
6172 while (offNative < cNative)
6173 {
6174 /* If we're at or have passed the point where the next chunk of debug
6175 info starts, process it. */
6176 if (offDbgNativeNext <= offNative)
6177 {
6178 offDbgNativeNext = UINT32_MAX;
6179 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
6180 {
6181 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
6182 {
6183 case kIemTbDbgEntryType_GuestInstruction:
6184 {
6185 /* Did the exec flag change? */
6186 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
6187 {
6188 pHlp->pfnPrintf(pHlp,
6189 " fExec change %#08x -> %#08x %s\n",
6190 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6191 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6192 szDisBuf, sizeof(szDisBuf)));
6193 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
6194 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6195 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6196 : DISCPUMODE_64BIT;
6197 }
6198
6199 /* New opcode range? We need to fend up a spurious debug info entry here for cases
6200 where the compilation was aborted before the opcode was recorded and the actual
6201 instruction was translated to a threaded call. This may happen when we run out
6202 of ranges, or when some complicated interrupts/FFs are found to be pending or
6203 similar. So, we just deal with it here rather than in the compiler code as it
6204 is a lot simpler to do up here. */
6205 if ( idxRange == UINT8_MAX
6206 || idxRange >= cRanges
6207 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
6208 {
6209 idxRange += 1;
6210 if (idxRange < cRanges)
6211 offRange = 0;
6212 else
6213 continue;
6214 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
6215 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
6216 + (pTb->aRanges[idxRange].idxPhysPage == 0
6217 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6218 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
6219 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6220 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
6221 pTb->aRanges[idxRange].idxPhysPage);
6222 }
6223
6224 /* Disassemble the instruction. */
6225 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
6226 uint32_t cbInstr = 1;
6227 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6228 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
6229 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6230 if (RT_SUCCESS(rc))
6231 {
6232 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6233 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6234 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6235 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6236
6237 static unsigned const s_offMarker = 55;
6238 static char const s_szMarker[] = " ; <--- guest";
6239 if (cch < s_offMarker)
6240 {
6241 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
6242 cch = s_offMarker;
6243 }
6244 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
6245 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
6246
6247 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
6248 }
6249 else
6250 {
6251 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
6252 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
6253 cbInstr = 1;
6254 }
6255 GCPhysPc += cbInstr;
6256 offOpcodes += cbInstr;
6257 offRange += cbInstr;
6258 continue;
6259 }
6260
6261 case kIemTbDbgEntryType_ThreadedCall:
6262 pHlp->pfnPrintf(pHlp,
6263 " Call #%u to %s (%u args)%s\n",
6264 idxThreadedCall,
6265 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6266 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6267 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
6268 idxThreadedCall++;
6269 continue;
6270
6271 case kIemTbDbgEntryType_GuestRegShadowing:
6272 {
6273 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
6274 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
6275 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
6276 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
6277 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6278 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
6279 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
6280 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
6281 else
6282 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
6283 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
6284 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6285 continue;
6286 }
6287
6288 case kIemTbDbgEntryType_Label:
6289 {
6290 const char *pszName = "what_the_fudge";
6291 const char *pszComment = "";
6292 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
6293 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
6294 {
6295 case kIemNativeLabelType_Return:
6296 pszName = "Return";
6297 break;
6298 case kIemNativeLabelType_ReturnBreak:
6299 pszName = "ReturnBreak";
6300 break;
6301 case kIemNativeLabelType_ReturnWithFlags:
6302 pszName = "ReturnWithFlags";
6303 break;
6304 case kIemNativeLabelType_NonZeroRetOrPassUp:
6305 pszName = "NonZeroRetOrPassUp";
6306 break;
6307 case kIemNativeLabelType_RaiseGp0:
6308 pszName = "RaiseGp0";
6309 break;
6310 case kIemNativeLabelType_If:
6311 pszName = "If";
6312 fNumbered = true;
6313 break;
6314 case kIemNativeLabelType_Else:
6315 pszName = "Else";
6316 fNumbered = true;
6317 pszComment = " ; regs state restored pre-if-block";
6318 break;
6319 case kIemNativeLabelType_Endif:
6320 pszName = "Endif";
6321 fNumbered = true;
6322 break;
6323 case kIemNativeLabelType_CheckIrq:
6324 pszName = "CheckIrq_CheckVM";
6325 fNumbered = true;
6326 break;
6327 case kIemNativeLabelType_Invalid:
6328 case kIemNativeLabelType_End:
6329 break;
6330 }
6331 if (fNumbered)
6332 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
6333 else
6334 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
6335 continue;
6336 }
6337
6338 case kIemTbDbgEntryType_NativeOffset:
6339 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
6340 Assert(offDbgNativeNext > offNative);
6341 break;
6342
6343 default:
6344 AssertFailed();
6345 }
6346 iDbgEntry++;
6347 break;
6348 }
6349 }
6350
6351 /*
6352 * Disassemble the next native instruction.
6353 */
6354 uint32_t cbInstr = sizeof(paNative[0]);
6355 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
6356 if (RT_SUCCESS(rc))
6357 {
6358# if defined(RT_ARCH_AMD64)
6359 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6360 {
6361 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6362 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6363 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
6364 &paNative[offNative], uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6365 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6366 uInfo & 0x8000 ? " - recompiled" : "");
6367 else
6368 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
6369 }
6370 else
6371# endif
6372 {
6373 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6374 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6375 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6376 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6377 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
6378 }
6379 }
6380 else
6381 {
6382# if defined(RT_ARCH_AMD64)
6383 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6384 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
6385# elif defined(RT_ARCH_ARM64)
6386 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
6387 &paNative[offNative], paNative[offNative], rc);
6388# else
6389# error "Port me"
6390# endif
6391 cbInstr = sizeof(paNative[0]);
6392 }
6393 offNative += cbInstr / sizeof(paNative[0]);
6394 }
6395 }
6396 else
6397#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
6398 {
6399 /*
6400 * No debug info, just disassemble the x86 code and then the native code.
6401 */
6402 /* The guest code. */
6403 for (unsigned i = 0; i < pTb->cRanges; i++)
6404 {
6405 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
6406 + (pTb->aRanges[i].idxPhysPage == 0
6407 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6408 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
6409 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6410 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
6411 unsigned off = pTb->aRanges[i].offOpcodes;
6412 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
6413 while (off < cbOpcodes)
6414 {
6415 uint32_t cbInstr = 1;
6416 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6417 &pTb->pabOpcodes[off], cbOpcodes - off,
6418 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6419 if (RT_SUCCESS(rc))
6420 {
6421 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6422 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6423 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6424 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6425 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
6426 GCPhysPc += cbInstr;
6427 off += cbInstr;
6428 }
6429 else
6430 {
6431 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
6432 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
6433 break;
6434 }
6435 }
6436 }
6437
6438 /* The native code: */
6439 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
6440 while(offNative < cNative)
6441 {
6442 uint32_t cbInstr = sizeof(paNative[0]);
6443 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
6444 if (RT_SUCCESS(rc))
6445 {
6446# if defined(RT_ARCH_AMD64)
6447 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6448 {
6449 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6450 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6451 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
6452 &paNative[offNative], uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6453 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6454 uInfo & 0x8000 ? " - recompiled" : "");
6455 else
6456 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
6457 }
6458 else
6459# endif
6460 {
6461 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6462 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6463 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6464 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6465 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
6466 }
6467 }
6468 else
6469 {
6470# if defined(RT_ARCH_AMD64)
6471 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6472 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
6473# elif defined(RT_ARCH_ARM64)
6474 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
6475 &paNative[offNative], paNative[offNative], rc);
6476# else
6477# error "Port me"
6478#endif
6479 cbInstr = sizeof(paNative[0]);
6480 }
6481 offNative += cbInstr / sizeof(paNative[0]);
6482 }
6483 }
6484}
6485
6486
6487/**
6488 * Recompiles the given threaded TB into a native one.
6489 *
6490 * In case of failure the translation block will be returned as-is.
6491 *
6492 * @returns pTb.
6493 * @param pVCpu The cross context virtual CPU structure of the calling
6494 * thread.
6495 * @param pTb The threaded translation to recompile to native.
6496 */
6497DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
6498{
6499 /*
6500 * The first time thru, we allocate the recompiler state, the other times
6501 * we just need to reset it before using it again.
6502 */
6503 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
6504 if (RT_LIKELY(pReNative))
6505 iemNativeReInit(pReNative, pTb);
6506 else
6507 {
6508 pReNative = iemNativeInit(pVCpu, pTb);
6509 AssertReturn(pReNative, pTb);
6510 }
6511
6512 /*
6513 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
6514 * for aborting if an error happens.
6515 */
6516 uint32_t cCallsLeft = pTb->Thrd.cCalls;
6517#ifdef LOG_ENABLED
6518 uint32_t const cCallsOrg = cCallsLeft;
6519#endif
6520 uint32_t off = 0;
6521 int rc = VINF_SUCCESS;
6522 IEMNATIVE_TRY_SETJMP(pReNative, rc)
6523 {
6524 /*
6525 * Emit prolog code (fixed).
6526 */
6527 off = iemNativeEmitProlog(pReNative, off);
6528
6529 /*
6530 * Convert the calls to native code.
6531 */
6532#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6533 int32_t iGstInstr = -1;
6534 uint32_t fExec = pTb->fFlags;
6535#endif
6536 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
6537 while (cCallsLeft-- > 0)
6538 {
6539 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
6540
6541 /*
6542 * Debug info and assembly markup.
6543 */
6544#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6545 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
6546 fExec = pCallEntry->auParams[0];
6547 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6548 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
6549 {
6550 if (iGstInstr < (int32_t)pTb->cInstructions)
6551 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
6552 else
6553 Assert(iGstInstr == pTb->cInstructions);
6554 iGstInstr = pCallEntry->idxInstr;
6555 }
6556 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
6557#elif defined(VBOX_STRICT)
6558 off = iemNativeEmitMarker(pReNative, off,
6559 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
6560 pCallEntry->enmFunction));
6561#endif
6562
6563 /*
6564 * Actual work.
6565 */
6566 if (pfnRecom) /** @todo stats on this. */
6567 {
6568 //STAM_COUNTER_INC()
6569 off = pfnRecom(pReNative, off, pCallEntry);
6570 }
6571 else
6572 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
6573 Assert(off <= pReNative->cInstrBufAlloc);
6574 Assert(pReNative->cCondDepth == 0);
6575
6576 /*
6577 * Advance.
6578 */
6579 pCallEntry++;
6580 }
6581
6582 /*
6583 * Emit the epilog code.
6584 */
6585 uint32_t idxReturnLabel;
6586 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
6587
6588 /*
6589 * Generate special jump labels.
6590 */
6591 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
6592 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
6593 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
6594 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
6595 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
6596 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
6597 }
6598 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
6599 {
6600 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
6601 return pTb;
6602 }
6603 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
6604 Assert(off <= pReNative->cInstrBufAlloc);
6605
6606 /*
6607 * Make sure all labels has been defined.
6608 */
6609 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
6610#ifdef VBOX_STRICT
6611 uint32_t const cLabels = pReNative->cLabels;
6612 for (uint32_t i = 0; i < cLabels; i++)
6613 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
6614#endif
6615
6616 /*
6617 * Allocate executable memory, copy over the code we've generated.
6618 */
6619 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
6620 if (pTbAllocator->pDelayedFreeHead)
6621 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
6622
6623 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
6624 AssertReturn(paFinalInstrBuf, pTb);
6625 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
6626
6627 /*
6628 * Apply fixups.
6629 */
6630 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
6631 uint32_t const cFixups = pReNative->cFixups;
6632 for (uint32_t i = 0; i < cFixups; i++)
6633 {
6634 Assert(paFixups[i].off < off);
6635 Assert(paFixups[i].idxLabel < cLabels);
6636 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
6637 switch (paFixups[i].enmType)
6638 {
6639#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6640 case kIemNativeFixupType_Rel32:
6641 Assert(paFixups[i].off + 4 <= off);
6642 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6643 continue;
6644
6645#elif defined(RT_ARCH_ARM64)
6646 case kIemNativeFixupType_RelImm19At5:
6647 {
6648 Assert(paFixups[i].off < off);
6649 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
6650 Assert(offDisp >= -262144 && offDisp < 262144);
6651 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
6652 continue;
6653 }
6654#endif
6655 case kIemNativeFixupType_Invalid:
6656 case kIemNativeFixupType_End:
6657 break;
6658 }
6659 AssertFailed();
6660 }
6661
6662 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
6663
6664 /*
6665 * Convert the translation block.
6666 */
6667 //RT_BREAKPOINT();
6668 RTMemFree(pTb->Thrd.paCalls);
6669 pTb->Native.paInstructions = paFinalInstrBuf;
6670 pTb->Native.cInstructions = off;
6671 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
6672#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6673 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
6674 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
6675#endif
6676
6677 Assert(pTbAllocator->cThreadedTbs > 0);
6678 pTbAllocator->cThreadedTbs -= 1;
6679 pTbAllocator->cNativeTbs += 1;
6680 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
6681
6682#ifdef LOG_ENABLED
6683 /*
6684 * Disassemble to the log if enabled.
6685 */
6686 if (LogIs3Enabled())
6687 {
6688 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
6689 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
6690 }
6691#endif
6692
6693 return pTb;
6694}
6695
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette