VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101706

Last change on this file since 101706 was 101706, checked in by vboxsync, 15 months ago

VMM/IEM: Native translation for IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS and all it's relative jump friends. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 246.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101706 2023-11-02 00:29:00Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94
95#include "IEMInline.h"
96#include "IEMThreadedFunctions.h"
97#include "IEMN8veRecompiler.h"
98#include "IEMNativeFunctions.h"
99
100
101/*
102 * Narrow down configs here to avoid wasting time on unused configs here.
103 * Note! Same checks in IEMAllThrdRecompiler.cpp.
104 */
105
106#ifndef IEM_WITH_CODE_TLB
107# error The code TLB must be enabled for the recompiler.
108#endif
109
110#ifndef IEM_WITH_DATA_TLB
111# error The data TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_SETJMP
115# error The setjmp approach must be enabled for the recompiler.
116#endif
117
118
119/*********************************************************************************************************************************
120* Defined Constants And Macros *
121*********************************************************************************************************************************/
122/** Always count instructions for now. */
123#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
124
125
126/*********************************************************************************************************************************
127* Internal Functions *
128*********************************************************************************************************************************/
129#ifdef VBOX_STRICT
130static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
131 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
132#endif
133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
134static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
135static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
136#endif
137
138
139/*********************************************************************************************************************************
140* Executable Memory Allocator *
141*********************************************************************************************************************************/
142/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
143 * Use an alternative chunk sub-allocator that does store internal data
144 * in the chunk.
145 *
146 * Using the RTHeapSimple is not practial on newer darwin systems where
147 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
148 * memory. We would have to change the protection of the whole chunk for
149 * every call to RTHeapSimple, which would be rather expensive.
150 *
151 * This alternative implemenation let restrict page protection modifications
152 * to the pages backing the executable memory we just allocated.
153 */
154#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155/** The chunk sub-allocation unit size in bytes. */
156#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
157/** The chunk sub-allocation unit size as a shift factor. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
159
160#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
161# ifdef IEMNATIVE_USE_GDB_JIT
162# define IEMNATIVE_USE_GDB_JIT_ET_DYN
163
164/** GDB JIT: Code entry. */
165typedef struct GDBJITCODEENTRY
166{
167 struct GDBJITCODEENTRY *pNext;
168 struct GDBJITCODEENTRY *pPrev;
169 uint8_t *pbSymFile;
170 uint64_t cbSymFile;
171} GDBJITCODEENTRY;
172
173/** GDB JIT: Actions. */
174typedef enum GDBJITACTIONS : uint32_t
175{
176 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
177} GDBJITACTIONS;
178
179/** GDB JIT: Descriptor. */
180typedef struct GDBJITDESCRIPTOR
181{
182 uint32_t uVersion;
183 GDBJITACTIONS enmAction;
184 GDBJITCODEENTRY *pRelevant;
185 GDBJITCODEENTRY *pHead;
186 /** Our addition: */
187 GDBJITCODEENTRY *pTail;
188} GDBJITDESCRIPTOR;
189
190/** GDB JIT: Our simple symbol file data. */
191typedef struct GDBJITSYMFILE
192{
193 Elf64_Ehdr EHdr;
194# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
195 Elf64_Shdr aShdrs[5];
196# else
197 Elf64_Shdr aShdrs[7];
198 Elf64_Phdr aPhdrs[2];
199# endif
200 /** The dwarf ehframe data for the chunk. */
201 uint8_t abEhFrame[512];
202 char szzStrTab[128];
203 Elf64_Sym aSymbols[3];
204# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
205 Elf64_Sym aDynSyms[2];
206 Elf64_Dyn aDyn[6];
207# endif
208} GDBJITSYMFILE;
209
210extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
211extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
212
213/** Init once for g_IemNativeGdbJitLock. */
214static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
215/** Init once for the critical section. */
216static RTCRITSECT g_IemNativeGdbJitLock;
217
218/** GDB reads the info here. */
219GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
220
221/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
222DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
223{
224 ASMNopPause();
225}
226
227/** @callback_method_impl{FNRTONCE} */
228static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
229{
230 RT_NOREF(pvUser);
231 return RTCritSectInit(&g_IemNativeGdbJitLock);
232}
233
234
235# endif /* IEMNATIVE_USE_GDB_JIT */
236
237/**
238 * Per-chunk unwind info for non-windows hosts.
239 */
240typedef struct IEMEXECMEMCHUNKEHFRAME
241{
242# ifdef IEMNATIVE_USE_LIBUNWIND
243 /** The offset of the FDA into abEhFrame. */
244 uintptr_t offFda;
245# else
246 /** 'struct object' storage area. */
247 uint8_t abObject[1024];
248# endif
249# ifdef IEMNATIVE_USE_GDB_JIT
250# if 0
251 /** The GDB JIT 'symbol file' data. */
252 GDBJITSYMFILE GdbJitSymFile;
253# endif
254 /** The GDB JIT list entry. */
255 GDBJITCODEENTRY GdbJitEntry;
256# endif
257 /** The dwarf ehframe data for the chunk. */
258 uint8_t abEhFrame[512];
259} IEMEXECMEMCHUNKEHFRAME;
260/** Pointer to per-chunk info info for non-windows hosts. */
261typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
262#endif
263
264
265/**
266 * An chunk of executable memory.
267 */
268typedef struct IEMEXECMEMCHUNK
269{
270#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
271 /** Number of free items in this chunk. */
272 uint32_t cFreeUnits;
273 /** Hint were to start searching for free space in the allocation bitmap. */
274 uint32_t idxFreeHint;
275#else
276 /** The heap handle. */
277 RTHEAPSIMPLE hHeap;
278#endif
279 /** Pointer to the chunk. */
280 void *pvChunk;
281#ifdef IN_RING3
282 /**
283 * Pointer to the unwind information.
284 *
285 * This is used during C++ throw and longjmp (windows and probably most other
286 * platforms). Some debuggers (windbg) makes use of it as well.
287 *
288 * Windows: This is allocated from hHeap on windows because (at least for
289 * AMD64) the UNWIND_INFO structure address in the
290 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
291 *
292 * Others: Allocated from the regular heap to avoid unnecessary executable data
293 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
294 void *pvUnwindInfo;
295#elif defined(IN_RING0)
296 /** Allocation handle. */
297 RTR0MEMOBJ hMemObj;
298#endif
299} IEMEXECMEMCHUNK;
300/** Pointer to a memory chunk. */
301typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
302
303
304/**
305 * Executable memory allocator for the native recompiler.
306 */
307typedef struct IEMEXECMEMALLOCATOR
308{
309 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
310 uint32_t uMagic;
311
312 /** The chunk size. */
313 uint32_t cbChunk;
314 /** The maximum number of chunks. */
315 uint32_t cMaxChunks;
316 /** The current number of chunks. */
317 uint32_t cChunks;
318 /** Hint where to start looking for available memory. */
319 uint32_t idxChunkHint;
320 /** Statistics: Current number of allocations. */
321 uint32_t cAllocations;
322
323 /** The total amount of memory available. */
324 uint64_t cbTotal;
325 /** Total amount of free memory. */
326 uint64_t cbFree;
327 /** Total amount of memory allocated. */
328 uint64_t cbAllocated;
329
330#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
331 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
332 *
333 * Since the chunk size is a power of two and the minimum chunk size is a lot
334 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
335 * require a whole number of uint64_t elements in the allocation bitmap. So,
336 * for sake of simplicity, they are allocated as one continous chunk for
337 * simplicity/laziness. */
338 uint64_t *pbmAlloc;
339 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
340 uint32_t cUnitsPerChunk;
341 /** Number of bitmap elements per chunk (for quickly locating the bitmap
342 * portion corresponding to an chunk). */
343 uint32_t cBitmapElementsPerChunk;
344#else
345 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
346 * @{ */
347 /** The size of the heap internal block header. This is used to adjust the
348 * request memory size to make sure there is exacly enough room for a header at
349 * the end of the blocks we allocate before the next 64 byte alignment line. */
350 uint32_t cbHeapBlockHdr;
351 /** The size of initial heap allocation required make sure the first
352 * allocation is correctly aligned. */
353 uint32_t cbHeapAlignTweak;
354 /** The alignment tweak allocation address. */
355 void *pvAlignTweak;
356 /** @} */
357#endif
358
359#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
360 /** Pointer to the array of unwind info running parallel to aChunks (same
361 * allocation as this structure, located after the bitmaps).
362 * (For Windows, the structures must reside in 32-bit RVA distance to the
363 * actual chunk, so they are allocated off the chunk.) */
364 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
365#endif
366
367 /** The allocation chunks. */
368 RT_FLEXIBLE_ARRAY_EXTENSION
369 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
370} IEMEXECMEMALLOCATOR;
371/** Pointer to an executable memory allocator. */
372typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
373
374/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
375#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
376
377
378static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
379
380
381/**
382 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
383 * the heap statistics.
384 */
385static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
386 uint32_t cbReq, uint32_t idxChunk)
387{
388 pExecMemAllocator->cAllocations += 1;
389 pExecMemAllocator->cbAllocated += cbReq;
390#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
391 pExecMemAllocator->cbFree -= cbReq;
392#else
393 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
394#endif
395 pExecMemAllocator->idxChunkHint = idxChunk;
396
397#ifdef RT_OS_DARWIN
398 /*
399 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
400 * on darwin. So, we mark the pages returned as read+write after alloc and
401 * expect the caller to call iemExecMemAllocatorReadyForUse when done
402 * writing to the allocation.
403 *
404 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
405 * for details.
406 */
407 /** @todo detect if this is necessary... it wasn't required on 10.15 or
408 * whatever older version it was. */
409 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
410 AssertRC(rc);
411#endif
412
413 return pvRet;
414}
415
416
417#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
418static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
419 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
420{
421 /*
422 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
423 */
424 Assert(!(cToScan & 63));
425 Assert(!(idxFirst & 63));
426 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
427 pbmAlloc += idxFirst / 64;
428
429 /*
430 * Scan the bitmap for cReqUnits of consequtive clear bits
431 */
432 /** @todo This can probably be done more efficiently for non-x86 systems. */
433 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
434 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
435 {
436 uint32_t idxAddBit = 1;
437 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
438 idxAddBit++;
439 if (idxAddBit >= cReqUnits)
440 {
441 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
442
443 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
444 pChunk->cFreeUnits -= cReqUnits;
445 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
446
447 void * const pvRet = (uint8_t *)pChunk->pvChunk
448 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
449
450 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
451 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
452 }
453
454 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
455 }
456 return NULL;
457}
458#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
459
460
461static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
462{
463#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
464 /*
465 * Figure out how much to allocate.
466 */
467 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
468 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
469 {
470 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
471 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
472 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
473 {
474 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
475 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
476 if (pvRet)
477 return pvRet;
478 }
479 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
480 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
481 cReqUnits, idxChunk);
482 }
483#else
484 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
485 if (pvRet)
486 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
487#endif
488 return NULL;
489
490}
491
492
493/**
494 * Allocates @a cbReq bytes of executable memory.
495 *
496 * @returns Pointer to the memory, NULL if out of memory or other problem
497 * encountered.
498 * @param pVCpu The cross context virtual CPU structure of the calling
499 * thread.
500 * @param cbReq How many bytes are required.
501 */
502static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
503{
504 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
505 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
506 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
507
508 /*
509 * Adjust the request size so it'll fit the allocator alignment/whatnot.
510 *
511 * For the RTHeapSimple allocator this means to follow the logic described
512 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
513 * existing chunks if we think we've got sufficient free memory around.
514 *
515 * While for the alternative one we just align it up to a whole unit size.
516 */
517#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
518 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
519#else
520 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
521#endif
522 if (cbReq <= pExecMemAllocator->cbFree)
523 {
524 uint32_t const cChunks = pExecMemAllocator->cChunks;
525 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
526 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
527 {
528 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
529 if (pvRet)
530 return pvRet;
531 }
532 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
533 {
534 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
535 if (pvRet)
536 return pvRet;
537 }
538 }
539
540 /*
541 * Can we grow it with another chunk?
542 */
543 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
544 {
545 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
546 AssertLogRelRCReturn(rc, NULL);
547
548 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 AssertFailed();
553 }
554
555 /* What now? Prune native translation blocks from the cache? */
556 AssertFailed();
557 return NULL;
558}
559
560
561/** This is a hook that we may need later for changing memory protection back
562 * to readonly+exec */
563static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
564{
565#ifdef RT_OS_DARWIN
566 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
567 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
568 AssertRC(rc); RT_NOREF(pVCpu);
569
570 /*
571 * Flush the instruction cache:
572 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
573 */
574 /* sys_dcache_flush(pv, cb); - not necessary */
575 sys_icache_invalidate(pv, cb);
576#else
577 RT_NOREF(pVCpu, pv, cb);
578#endif
579}
580
581
582/**
583 * Frees executable memory.
584 */
585void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
586{
587 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
588 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
589 Assert(pv);
590#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
591 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
592#else
593 Assert(!((uintptr_t)pv & 63));
594#endif
595
596 /* Align the size as we did when allocating the block. */
597#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
598 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
599#else
600 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
601#endif
602
603 /* Free it / assert sanity. */
604#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
605 uint32_t const cChunks = pExecMemAllocator->cChunks;
606 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
607 bool fFound = false;
608 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
609 {
610 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
611 fFound = offChunk < cbChunk;
612 if (fFound)
613 {
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
616 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
617
618 /* Check that it's valid and free it. */
619 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
620 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
621 for (uint32_t i = 1; i < cReqUnits; i++)
622 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
623 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
624
625 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
626 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
627
628 /* Update the stats. */
629 pExecMemAllocator->cbAllocated -= cb;
630 pExecMemAllocator->cbFree += cb;
631 pExecMemAllocator->cAllocations -= 1;
632 return;
633#else
634 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
635 break;
636#endif
637 }
638 }
639# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
640 AssertFailed();
641# else
642 Assert(fFound);
643# endif
644#endif
645
646#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
647 /* Update stats while cb is freshly calculated.*/
648 pExecMemAllocator->cbAllocated -= cb;
649 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
650 pExecMemAllocator->cAllocations -= 1;
651
652 /* Free it. */
653 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
654#endif
655}
656
657
658
659#ifdef IN_RING3
660# ifdef RT_OS_WINDOWS
661
662/**
663 * Initializes the unwind info structures for windows hosts.
664 */
665static int
666iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
667 void *pvChunk, uint32_t idxChunk)
668{
669 RT_NOREF(pVCpu);
670
671 /*
672 * The AMD64 unwind opcodes.
673 *
674 * This is a program that starts with RSP after a RET instruction that
675 * ends up in recompiled code, and the operations we describe here will
676 * restore all non-volatile registers and bring RSP back to where our
677 * RET address is. This means it's reverse order from what happens in
678 * the prologue.
679 *
680 * Note! Using a frame register approach here both because we have one
681 * and but mainly because the UWOP_ALLOC_LARGE argument values
682 * would be a pain to write initializers for. On the positive
683 * side, we're impervious to changes in the the stack variable
684 * area can can deal with dynamic stack allocations if necessary.
685 */
686 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
687 {
688 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
689 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
690 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
691 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
692 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
693 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
694 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
695 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
696 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
697 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
698 };
699 union
700 {
701 IMAGE_UNWIND_INFO Info;
702 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
703 } s_UnwindInfo =
704 {
705 {
706 /* .Version = */ 1,
707 /* .Flags = */ 0,
708 /* .SizeOfProlog = */ 16, /* whatever */
709 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
710 /* .FrameRegister = */ X86_GREG_xBP,
711 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
712 }
713 };
714 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
715 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
716
717 /*
718 * Calc how much space we need and allocate it off the exec heap.
719 */
720 unsigned const cFunctionEntries = 1;
721 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
722 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
723# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
724 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
725 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
726 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
727# else
728 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
729 - pExecMemAllocator->cbHeapBlockHdr;
730 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
731 32 /*cbAlignment*/);
732# endif
733 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
734 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
735
736 /*
737 * Initialize the structures.
738 */
739 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
740
741 paFunctions[0].BeginAddress = 0;
742 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
743 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
744
745 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
746 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
747
748 /*
749 * Register it.
750 */
751 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
752 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
753
754 return VINF_SUCCESS;
755}
756
757
758# else /* !RT_OS_WINDOWS */
759
760/**
761 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
762 */
763DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
764{
765 if (iValue >= 64)
766 {
767 Assert(iValue < 0x2000);
768 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
769 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
770 }
771 else if (iValue >= 0)
772 *Ptr.pb++ = (uint8_t)iValue;
773 else if (iValue > -64)
774 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
775 else
776 {
777 Assert(iValue > -0x2000);
778 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
779 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
780 }
781 return Ptr;
782}
783
784
785/**
786 * Emits an ULEB128 encoded value (up to 64-bit wide).
787 */
788DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
789{
790 while (uValue >= 0x80)
791 {
792 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
793 uValue >>= 7;
794 }
795 *Ptr.pb++ = (uint8_t)uValue;
796 return Ptr;
797}
798
799
800/**
801 * Emits a CFA rule as register @a uReg + offset @a off.
802 */
803DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
804{
805 *Ptr.pb++ = DW_CFA_def_cfa;
806 Ptr = iemDwarfPutUleb128(Ptr, uReg);
807 Ptr = iemDwarfPutUleb128(Ptr, off);
808 return Ptr;
809}
810
811
812/**
813 * Emits a register (@a uReg) save location:
814 * CFA + @a off * data_alignment_factor
815 */
816DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
817{
818 if (uReg < 0x40)
819 *Ptr.pb++ = DW_CFA_offset | uReg;
820 else
821 {
822 *Ptr.pb++ = DW_CFA_offset_extended;
823 Ptr = iemDwarfPutUleb128(Ptr, uReg);
824 }
825 Ptr = iemDwarfPutUleb128(Ptr, off);
826 return Ptr;
827}
828
829
830# if 0 /* unused */
831/**
832 * Emits a register (@a uReg) save location, using signed offset:
833 * CFA + @a offSigned * data_alignment_factor
834 */
835DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
836{
837 *Ptr.pb++ = DW_CFA_offset_extended_sf;
838 Ptr = iemDwarfPutUleb128(Ptr, uReg);
839 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
840 return Ptr;
841}
842# endif
843
844
845/**
846 * Initializes the unwind info section for non-windows hosts.
847 */
848static int
849iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
850 void *pvChunk, uint32_t idxChunk)
851{
852 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
853 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
854
855 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
856
857 /*
858 * Generate the CIE first.
859 */
860# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
861 uint8_t const iDwarfVer = 3;
862# else
863 uint8_t const iDwarfVer = 4;
864# endif
865 RTPTRUNION const PtrCie = Ptr;
866 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
867 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
868 *Ptr.pb++ = iDwarfVer; /* DwARF version */
869 *Ptr.pb++ = 0; /* Augmentation. */
870 if (iDwarfVer >= 4)
871 {
872 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
873 *Ptr.pb++ = 0; /* Segment selector size. */
874 }
875# ifdef RT_ARCH_AMD64
876 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
877# else
878 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
879# endif
880 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
881# ifdef RT_ARCH_AMD64
882 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
883# elif defined(RT_ARCH_ARM64)
884 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
885# else
886# error "port me"
887# endif
888 /* Initial instructions: */
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
891 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
892 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
893 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
894 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
895 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
896 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
897 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
898# elif defined(RT_ARCH_ARM64)
899# if 1
900 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
901# else
902 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
903# endif
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
916 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
917 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
918# else
919# error "port me"
920# endif
921 while ((Ptr.u - PtrCie.u) & 3)
922 *Ptr.pb++ = DW_CFA_nop;
923 /* Finalize the CIE size. */
924 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
925
926 /*
927 * Generate an FDE for the whole chunk area.
928 */
929# ifdef IEMNATIVE_USE_LIBUNWIND
930 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
931# endif
932 RTPTRUNION const PtrFde = Ptr;
933 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
934 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
935 Ptr.pu32++;
936 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
937 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
938# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
939 *Ptr.pb++ = DW_CFA_nop;
940# endif
941 while ((Ptr.u - PtrFde.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the FDE size. */
944 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
945
946 /* Terminator entry. */
947 *Ptr.pu32++ = 0;
948 *Ptr.pu32++ = 0; /* just to be sure... */
949 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
950
951 /*
952 * Register it.
953 */
954# ifdef IEMNATIVE_USE_LIBUNWIND
955 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
956# else
957 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
958 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
959# endif
960
961# ifdef IEMNATIVE_USE_GDB_JIT
962 /*
963 * Now for telling GDB about this (experimental).
964 *
965 * This seems to work best with ET_DYN.
966 */
967 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
968# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
969 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
970 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
971# else
972 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
973 - pExecMemAllocator->cbHeapBlockHdr;
974 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
975# endif
976 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
977 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
978
979 RT_ZERO(*pSymFile);
980
981 /*
982 * The ELF header:
983 */
984 pSymFile->EHdr.e_ident[0] = ELFMAG0;
985 pSymFile->EHdr.e_ident[1] = ELFMAG1;
986 pSymFile->EHdr.e_ident[2] = ELFMAG2;
987 pSymFile->EHdr.e_ident[3] = ELFMAG3;
988 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
989 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
990 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
991 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
992# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
993 pSymFile->EHdr.e_type = ET_DYN;
994# else
995 pSymFile->EHdr.e_type = ET_REL;
996# endif
997# ifdef RT_ARCH_AMD64
998 pSymFile->EHdr.e_machine = EM_AMD64;
999# elif defined(RT_ARCH_ARM64)
1000 pSymFile->EHdr.e_machine = EM_AARCH64;
1001# else
1002# error "port me"
1003# endif
1004 pSymFile->EHdr.e_version = 1; /*?*/
1005 pSymFile->EHdr.e_entry = 0;
1006# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1007 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1008# else
1009 pSymFile->EHdr.e_phoff = 0;
1010# endif
1011 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1012 pSymFile->EHdr.e_flags = 0;
1013 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1014# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1015 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1016 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1017# else
1018 pSymFile->EHdr.e_phentsize = 0;
1019 pSymFile->EHdr.e_phnum = 0;
1020# endif
1021 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1022 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1023 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1024
1025 uint32_t offStrTab = 0;
1026#define APPEND_STR(a_szStr) do { \
1027 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1028 offStrTab += sizeof(a_szStr); \
1029 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1030 } while (0)
1031#define APPEND_STR_FMT(a_szStr, ...) do { \
1032 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1033 offStrTab++; \
1034 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1035 } while (0)
1036
1037 /*
1038 * Section headers.
1039 */
1040 /* Section header #0: NULL */
1041 unsigned i = 0;
1042 APPEND_STR("");
1043 RT_ZERO(pSymFile->aShdrs[i]);
1044 i++;
1045
1046 /* Section header: .eh_frame */
1047 pSymFile->aShdrs[i].sh_name = offStrTab;
1048 APPEND_STR(".eh_frame");
1049 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1050 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1051# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1052 pSymFile->aShdrs[i].sh_offset
1053 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1054# else
1055 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1056 pSymFile->aShdrs[i].sh_offset = 0;
1057# endif
1058
1059 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1060 pSymFile->aShdrs[i].sh_link = 0;
1061 pSymFile->aShdrs[i].sh_info = 0;
1062 pSymFile->aShdrs[i].sh_addralign = 1;
1063 pSymFile->aShdrs[i].sh_entsize = 0;
1064 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1065 i++;
1066
1067 /* Section header: .shstrtab */
1068 unsigned const iShStrTab = i;
1069 pSymFile->EHdr.e_shstrndx = iShStrTab;
1070 pSymFile->aShdrs[i].sh_name = offStrTab;
1071 APPEND_STR(".shstrtab");
1072 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1073 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1074# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1075 pSymFile->aShdrs[i].sh_offset
1076 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1077# else
1078 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1079 pSymFile->aShdrs[i].sh_offset = 0;
1080# endif
1081 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1082 pSymFile->aShdrs[i].sh_link = 0;
1083 pSymFile->aShdrs[i].sh_info = 0;
1084 pSymFile->aShdrs[i].sh_addralign = 1;
1085 pSymFile->aShdrs[i].sh_entsize = 0;
1086 i++;
1087
1088 /* Section header: .symbols */
1089 pSymFile->aShdrs[i].sh_name = offStrTab;
1090 APPEND_STR(".symtab");
1091 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1092 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1093 pSymFile->aShdrs[i].sh_offset
1094 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1096 pSymFile->aShdrs[i].sh_link = iShStrTab;
1097 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1098 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1099 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1100 i++;
1101
1102# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1103 /* Section header: .symbols */
1104 pSymFile->aShdrs[i].sh_name = offStrTab;
1105 APPEND_STR(".dynsym");
1106 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1107 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1108 pSymFile->aShdrs[i].sh_offset
1109 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1110 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1111 pSymFile->aShdrs[i].sh_link = iShStrTab;
1112 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1113 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1114 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1115 i++;
1116# endif
1117
1118# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1119 /* Section header: .dynamic */
1120 pSymFile->aShdrs[i].sh_name = offStrTab;
1121 APPEND_STR(".dynamic");
1122 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1123 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1124 pSymFile->aShdrs[i].sh_offset
1125 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1126 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1127 pSymFile->aShdrs[i].sh_link = iShStrTab;
1128 pSymFile->aShdrs[i].sh_info = 0;
1129 pSymFile->aShdrs[i].sh_addralign = 1;
1130 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1131 i++;
1132# endif
1133
1134 /* Section header: .text */
1135 unsigned const iShText = i;
1136 pSymFile->aShdrs[i].sh_name = offStrTab;
1137 APPEND_STR(".text");
1138 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1139 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1140# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1141 pSymFile->aShdrs[i].sh_offset
1142 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1143# else
1144 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1145 pSymFile->aShdrs[i].sh_offset = 0;
1146# endif
1147 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1148 pSymFile->aShdrs[i].sh_link = 0;
1149 pSymFile->aShdrs[i].sh_info = 0;
1150 pSymFile->aShdrs[i].sh_addralign = 1;
1151 pSymFile->aShdrs[i].sh_entsize = 0;
1152 i++;
1153
1154 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1155
1156# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1157 /*
1158 * The program headers:
1159 */
1160 /* Everything in a single LOAD segment: */
1161 i = 0;
1162 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1163 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1164 pSymFile->aPhdrs[i].p_offset
1165 = pSymFile->aPhdrs[i].p_vaddr
1166 = pSymFile->aPhdrs[i].p_paddr = 0;
1167 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1168 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1169 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1170 i++;
1171 /* The .dynamic segment. */
1172 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1173 pSymFile->aPhdrs[i].p_flags = PF_R;
1174 pSymFile->aPhdrs[i].p_offset
1175 = pSymFile->aPhdrs[i].p_vaddr
1176 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1177 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1178 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1179 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1180 i++;
1181
1182 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1183
1184 /*
1185 * The dynamic section:
1186 */
1187 i = 0;
1188 pSymFile->aDyn[i].d_tag = DT_SONAME;
1189 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1190 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1191 i++;
1192 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1193 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1194 i++;
1195 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1196 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1197 i++;
1198 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1199 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1200 i++;
1201 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1202 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_NULL;
1205 i++;
1206 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1207# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1208
1209 /*
1210 * Symbol tables:
1211 */
1212 /** @todo gdb doesn't seem to really like this ... */
1213 i = 0;
1214 pSymFile->aSymbols[i].st_name = 0;
1215 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1216 pSymFile->aSymbols[i].st_value = 0;
1217 pSymFile->aSymbols[i].st_size = 0;
1218 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1219 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1220# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1221 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1222# endif
1223 i++;
1224
1225 pSymFile->aSymbols[i].st_name = 0;
1226 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1227 pSymFile->aSymbols[i].st_value = 0;
1228 pSymFile->aSymbols[i].st_size = 0;
1229 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1230 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1231 i++;
1232
1233 pSymFile->aSymbols[i].st_name = offStrTab;
1234 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1235# if 0
1236 pSymFile->aSymbols[i].st_shndx = iShText;
1237 pSymFile->aSymbols[i].st_value = 0;
1238# else
1239 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1240 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1241# endif
1242 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1246 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1247 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1248# endif
1249 i++;
1250
1251 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1252 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1253
1254 /*
1255 * The GDB JIT entry and informing GDB.
1256 */
1257 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1258# if 1
1259 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1260# else
1261 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1262# endif
1263
1264 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1265 RTCritSectEnter(&g_IemNativeGdbJitLock);
1266 pEhFrame->GdbJitEntry.pNext = NULL;
1267 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1268 if (__jit_debug_descriptor.pTail)
1269 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1270 else
1271 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1272 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1273 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1274
1275 /* Notify GDB: */
1276 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1277 __jit_debug_register_code();
1278 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1279 RTCritSectLeave(&g_IemNativeGdbJitLock);
1280
1281# else /* !IEMNATIVE_USE_GDB_JIT */
1282 RT_NOREF(pVCpu);
1283# endif /* !IEMNATIVE_USE_GDB_JIT */
1284
1285 return VINF_SUCCESS;
1286}
1287
1288# endif /* !RT_OS_WINDOWS */
1289#endif /* IN_RING3 */
1290
1291
1292/**
1293 * Adds another chunk to the executable memory allocator.
1294 *
1295 * This is used by the init code for the initial allocation and later by the
1296 * regular allocator function when it's out of memory.
1297 */
1298static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1299{
1300 /* Check that we've room for growth. */
1301 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1302 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1303
1304 /* Allocate a chunk. */
1305#ifdef RT_OS_DARWIN
1306 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1307#else
1308 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1309#endif
1310 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1311
1312#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1313 int rc = VINF_SUCCESS;
1314#else
1315 /* Initialize the heap for the chunk. */
1316 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1317 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1318 AssertRC(rc);
1319 if (RT_SUCCESS(rc))
1320 {
1321 /*
1322 * We want the memory to be aligned on 64 byte, so the first time thru
1323 * here we do some exploratory allocations to see how we can achieve this.
1324 * On subsequent runs we only make an initial adjustment allocation, if
1325 * necessary.
1326 *
1327 * Since we own the heap implementation, we know that the internal block
1328 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1329 * so all we need to wrt allocation size adjustments is to add 32 bytes
1330 * to the size, align up by 64 bytes, and subtract 32 bytes.
1331 *
1332 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1333 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1334 * allocation to force subsequent allocations to return 64 byte aligned
1335 * user areas.
1336 */
1337 if (!pExecMemAllocator->cbHeapBlockHdr)
1338 {
1339 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1340 pExecMemAllocator->cbHeapAlignTweak = 64;
1341 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1342 32 /*cbAlignment*/);
1343 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1344
1345 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1346 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1347 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1348 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1349 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1350
1351 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1352 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1353 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1354 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1355 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1356
1357 RTHeapSimpleFree(hHeap, pvTest2);
1358 RTHeapSimpleFree(hHeap, pvTest1);
1359 }
1360 else
1361 {
1362 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1364 }
1365 if (RT_SUCCESS(rc))
1366#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1367 {
1368 /*
1369 * Add the chunk.
1370 *
1371 * This must be done before the unwind init so windows can allocate
1372 * memory from the chunk when using the alternative sub-allocator.
1373 */
1374 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1375#ifdef IN_RING3
1376 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1377#endif
1378#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1379 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1380#else
1381 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1382 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1383 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1384 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1385#endif
1386
1387 pExecMemAllocator->cChunks = idxChunk + 1;
1388 pExecMemAllocator->idxChunkHint = idxChunk;
1389
1390#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1391 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1392 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1393#else
1394 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1395 pExecMemAllocator->cbTotal += cbFree;
1396 pExecMemAllocator->cbFree += cbFree;
1397#endif
1398
1399#ifdef IN_RING3
1400 /*
1401 * Initialize the unwind information (this cannot really fail atm).
1402 * (This sets pvUnwindInfo.)
1403 */
1404 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1405 if (RT_SUCCESS(rc))
1406#endif
1407 {
1408 return VINF_SUCCESS;
1409 }
1410
1411#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1412 /* Just in case the impossible happens, undo the above up: */
1413 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1414 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1415 pExecMemAllocator->cChunks = idxChunk;
1416 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1417 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1418 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1419 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1420#endif
1421 }
1422#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1423 }
1424#endif
1425 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1426 RT_NOREF(pVCpu);
1427 return rc;
1428}
1429
1430
1431/**
1432 * Initializes the executable memory allocator for native recompilation on the
1433 * calling EMT.
1434 *
1435 * @returns VBox status code.
1436 * @param pVCpu The cross context virtual CPU structure of the calling
1437 * thread.
1438 * @param cbMax The max size of the allocator.
1439 * @param cbInitial The initial allocator size.
1440 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1441 * dependent).
1442 */
1443int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1444{
1445 /*
1446 * Validate input.
1447 */
1448 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1449 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1450 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1451 || cbChunk == 0
1452 || ( RT_IS_POWER_OF_TWO(cbChunk)
1453 && cbChunk >= _1M
1454 && cbChunk <= _256M
1455 && cbChunk <= cbMax),
1456 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1457 VERR_OUT_OF_RANGE);
1458
1459 /*
1460 * Adjust/figure out the chunk size.
1461 */
1462 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1463 {
1464 if (cbMax >= _256M)
1465 cbChunk = _64M;
1466 else
1467 {
1468 if (cbMax < _16M)
1469 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1470 else
1471 cbChunk = (uint32_t)cbMax / 4;
1472 if (!RT_IS_POWER_OF_TWO(cbChunk))
1473 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1474 }
1475 }
1476
1477 if (cbChunk > cbMax)
1478 cbMax = cbChunk;
1479 else
1480 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1481 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1482 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1483
1484 /*
1485 * Allocate and initialize the allocatore instance.
1486 */
1487 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1488#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1489 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1490 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1491 cbNeeded += cbBitmap * cMaxChunks;
1492 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1493 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1494#endif
1495#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1496 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1497 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1498#endif
1499 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1500 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1501 VERR_NO_MEMORY);
1502 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1503 pExecMemAllocator->cbChunk = cbChunk;
1504 pExecMemAllocator->cMaxChunks = cMaxChunks;
1505 pExecMemAllocator->cChunks = 0;
1506 pExecMemAllocator->idxChunkHint = 0;
1507 pExecMemAllocator->cAllocations = 0;
1508 pExecMemAllocator->cbTotal = 0;
1509 pExecMemAllocator->cbFree = 0;
1510 pExecMemAllocator->cbAllocated = 0;
1511#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1512 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1513 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1514 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1515 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1516#endif
1517#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1518 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1519#endif
1520 for (uint32_t i = 0; i < cMaxChunks; i++)
1521 {
1522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1523 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1524 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1525#else
1526 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1527#endif
1528 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1529#ifdef IN_RING0
1530 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1531#else
1532 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1533#endif
1534 }
1535 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1536
1537 /*
1538 * Do the initial allocations.
1539 */
1540 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1541 {
1542 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1543 AssertLogRelRCReturn(rc, rc);
1544 }
1545
1546 pExecMemAllocator->idxChunkHint = 0;
1547
1548 return VINF_SUCCESS;
1549}
1550
1551
1552/*********************************************************************************************************************************
1553* Native Recompilation *
1554*********************************************************************************************************************************/
1555
1556
1557/**
1558 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1559 */
1560IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1561{
1562 pVCpu->iem.s.cInstructions += idxInstr;
1563 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1564}
1565
1566
1567/**
1568 * Used by TB code when it wants to raise a \#GP(0).
1569 */
1570IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1571{
1572 pVCpu->iem.s.cInstructions += idxInstr;
1573 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1574#ifndef _MSC_VER
1575 return VINF_IEM_RAISED_XCPT; /* not reached */
1576#endif
1577}
1578
1579
1580/**
1581 * Reinitializes the native recompiler state.
1582 *
1583 * Called before starting a new recompile job.
1584 */
1585static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1586{
1587 pReNative->cLabels = 0;
1588 pReNative->bmLabelTypes = 0;
1589 pReNative->cFixups = 0;
1590#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1591 pReNative->pDbgInfo->cEntries = 0;
1592#endif
1593 pReNative->pTbOrg = pTb;
1594 pReNative->cCondDepth = 0;
1595 pReNative->uCondSeqNo = 0;
1596 pReNative->uCheckIrqSeqNo = 0;
1597
1598 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1599#if IEMNATIVE_HST_GREG_COUNT < 32
1600 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1601#endif
1602 ;
1603 pReNative->Core.bmHstRegsWithGstShadow = 0;
1604 pReNative->Core.bmGstRegShadows = 0;
1605 pReNative->Core.bmVars = 0;
1606 pReNative->Core.u64ArgVars = UINT64_MAX;
1607
1608 /* Full host register reinit: */
1609 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1610 {
1611 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1612 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1613 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1614 }
1615
1616 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1617 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1618#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1619 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1620#endif
1621#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1622 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1623#endif
1624 );
1625 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1626 {
1627 fRegs &= ~RT_BIT_32(idxReg);
1628 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1629 }
1630
1631 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1632#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1633 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1634#endif
1635#ifdef IEMNATIVE_REG_FIXED_TMP0
1636 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1637#endif
1638 return pReNative;
1639}
1640
1641
1642/**
1643 * Allocates and initializes the native recompiler state.
1644 *
1645 * This is called the first time an EMT wants to recompile something.
1646 *
1647 * @returns Pointer to the new recompiler state.
1648 * @param pVCpu The cross context virtual CPU structure of the calling
1649 * thread.
1650 * @param pTb The TB that's about to be recompiled.
1651 * @thread EMT(pVCpu)
1652 */
1653static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1654{
1655 VMCPU_ASSERT_EMT(pVCpu);
1656
1657 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1658 AssertReturn(pReNative, NULL);
1659
1660 /*
1661 * Try allocate all the buffers and stuff we need.
1662 */
1663 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1664 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1665 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1666#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1667 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1668#endif
1669 if (RT_LIKELY( pReNative->pInstrBuf
1670 && pReNative->paLabels
1671 && pReNative->paFixups)
1672#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1673 && pReNative->pDbgInfo
1674#endif
1675 )
1676 {
1677 /*
1678 * Set the buffer & array sizes on success.
1679 */
1680 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1681 pReNative->cLabelsAlloc = _8K;
1682 pReNative->cFixupsAlloc = _16K;
1683#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1684 pReNative->cDbgInfoAlloc = _16K;
1685#endif
1686
1687 /*
1688 * Done, just need to save it and reinit it.
1689 */
1690 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1691 return iemNativeReInit(pReNative, pTb);
1692 }
1693
1694 /*
1695 * Failed. Cleanup and return.
1696 */
1697 AssertFailed();
1698 RTMemFree(pReNative->pInstrBuf);
1699 RTMemFree(pReNative->paLabels);
1700 RTMemFree(pReNative->paFixups);
1701#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1702 RTMemFree(pReNative->pDbgInfo);
1703#endif
1704 RTMemFree(pReNative);
1705 return NULL;
1706}
1707
1708
1709/**
1710 * Creates a label
1711 *
1712 * If the label does not yet have a defined position,
1713 * call iemNativeLabelDefine() later to set it.
1714 *
1715 * @returns Label ID. Throws VBox status code on failure, so no need to check
1716 * the return value.
1717 * @param pReNative The native recompile state.
1718 * @param enmType The label type.
1719 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1720 * label is not yet defined (default).
1721 * @param uData Data associated with the lable. Only applicable to
1722 * certain type of labels. Default is zero.
1723 */
1724DECL_HIDDEN_THROW(uint32_t)
1725iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1726 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1727{
1728 /*
1729 * Locate existing label definition.
1730 *
1731 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1732 * and uData is zero.
1733 */
1734 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1735 uint32_t const cLabels = pReNative->cLabels;
1736 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1737#ifndef VBOX_STRICT
1738 && offWhere == UINT32_MAX
1739 && uData == 0
1740#endif
1741 )
1742 {
1743 /** @todo Since this is only used for labels with uData = 0, just use a
1744 * lookup array? */
1745 for (uint32_t i = 0; i < cLabels; i++)
1746 if ( paLabels[i].enmType == enmType
1747 && paLabels[i].uData == uData)
1748 {
1749#ifdef VBOX_STRICT
1750 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1751 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1752#endif
1753 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1754 return i;
1755 }
1756 }
1757
1758 /*
1759 * Make sure we've got room for another label.
1760 */
1761 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1762 { /* likely */ }
1763 else
1764 {
1765 uint32_t cNew = pReNative->cLabelsAlloc;
1766 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1767 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1768 cNew *= 2;
1769 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1770 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1771 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1772 pReNative->paLabels = paLabels;
1773 pReNative->cLabelsAlloc = cNew;
1774 }
1775
1776 /*
1777 * Define a new label.
1778 */
1779 paLabels[cLabels].off = offWhere;
1780 paLabels[cLabels].enmType = enmType;
1781 paLabels[cLabels].uData = uData;
1782 pReNative->cLabels = cLabels + 1;
1783
1784 Assert(enmType >= 0 && enmType < 64);
1785 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1786
1787 if (offWhere != UINT32_MAX)
1788 {
1789#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1790 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1791 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1792#endif
1793 }
1794 return cLabels;
1795}
1796
1797
1798/**
1799 * Defines the location of an existing label.
1800 *
1801 * @param pReNative The native recompile state.
1802 * @param idxLabel The label to define.
1803 * @param offWhere The position.
1804 */
1805DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1806{
1807 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1808 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1809 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1810 pLabel->off = offWhere;
1811#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1812 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1813 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1814#endif
1815}
1816
1817
1818/**
1819 * Looks up a lable.
1820 *
1821 * @returns Label ID if found, UINT32_MAX if not.
1822 */
1823static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1824 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1825{
1826 Assert(enmType >= 0 && enmType < 64);
1827 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1828 {
1829 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1830 uint32_t const cLabels = pReNative->cLabels;
1831 for (uint32_t i = 0; i < cLabels; i++)
1832 if ( paLabels[i].enmType == enmType
1833 && paLabels[i].uData == uData
1834 && ( paLabels[i].off == offWhere
1835 || offWhere == UINT32_MAX
1836 || paLabels[i].off == UINT32_MAX))
1837 return i;
1838 }
1839 return UINT32_MAX;
1840}
1841
1842
1843/**
1844 * Adds a fixup.
1845 *
1846 * @throws VBox status code (int) on failure.
1847 * @param pReNative The native recompile state.
1848 * @param offWhere The instruction offset of the fixup location.
1849 * @param idxLabel The target label ID for the fixup.
1850 * @param enmType The fixup type.
1851 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1852 */
1853DECL_HIDDEN_THROW(void)
1854iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1855 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1856{
1857 Assert(idxLabel <= UINT16_MAX);
1858 Assert((unsigned)enmType <= UINT8_MAX);
1859
1860 /*
1861 * Make sure we've room.
1862 */
1863 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1864 uint32_t const cFixups = pReNative->cFixups;
1865 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1866 { /* likely */ }
1867 else
1868 {
1869 uint32_t cNew = pReNative->cFixupsAlloc;
1870 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1871 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1872 cNew *= 2;
1873 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1874 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1875 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1876 pReNative->paFixups = paFixups;
1877 pReNative->cFixupsAlloc = cNew;
1878 }
1879
1880 /*
1881 * Add the fixup.
1882 */
1883 paFixups[cFixups].off = offWhere;
1884 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1885 paFixups[cFixups].enmType = enmType;
1886 paFixups[cFixups].offAddend = offAddend;
1887 pReNative->cFixups = cFixups + 1;
1888}
1889
1890
1891/**
1892 * Slow code path for iemNativeInstrBufEnsure.
1893 */
1894DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1895{
1896 /* Double the buffer size till we meet the request. */
1897 uint32_t cNew = pReNative->cInstrBufAlloc;
1898 AssertReturn(cNew > 0, NULL);
1899 do
1900 cNew *= 2;
1901 while (cNew < off + cInstrReq);
1902
1903 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1904#ifdef RT_ARCH_ARM64
1905 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1906#else
1907 uint32_t const cbMaxInstrBuf = _2M;
1908#endif
1909 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1910
1911 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1912 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1913
1914 pReNative->cInstrBufAlloc = cNew;
1915 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1916}
1917
1918#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1919
1920/**
1921 * Grows the static debug info array used during recompilation.
1922 *
1923 * @returns Pointer to the new debug info block; throws VBox status code on
1924 * failure, so no need to check the return value.
1925 */
1926DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1927{
1928 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1929 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1930 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1931 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1932 pReNative->pDbgInfo = pDbgInfo;
1933 pReNative->cDbgInfoAlloc = cNew;
1934 return pDbgInfo;
1935}
1936
1937
1938/**
1939 * Adds a new debug info uninitialized entry, returning the pointer to it.
1940 */
1941DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1942{
1943 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1944 { /* likely */ }
1945 else
1946 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1947 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1948}
1949
1950
1951/**
1952 * Debug Info: Adds a native offset record, if necessary.
1953 */
1954static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1955{
1956 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1957
1958 /*
1959 * Search backwards to see if we've got a similar record already.
1960 */
1961 uint32_t idx = pDbgInfo->cEntries;
1962 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1963 while (idx-- > idxStop)
1964 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1965 {
1966 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1967 return;
1968 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1969 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1970 break;
1971 }
1972
1973 /*
1974 * Add it.
1975 */
1976 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1977 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1978 pEntry->NativeOffset.offNative = off;
1979}
1980
1981
1982/**
1983 * Debug Info: Record info about a label.
1984 */
1985static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
1986{
1987 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1988 pEntry->Label.uType = kIemTbDbgEntryType_Label;
1989 pEntry->Label.uUnused = 0;
1990 pEntry->Label.enmLabel = (uint8_t)enmType;
1991 pEntry->Label.uData = uData;
1992}
1993
1994
1995/**
1996 * Debug Info: Record info about a threaded call.
1997 */
1998static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
1999{
2000 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2001 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2002 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2003 pEntry->ThreadedCall.uUnused = 0;
2004 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2005}
2006
2007
2008/**
2009 * Debug Info: Record info about a new guest instruction.
2010 */
2011static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2012{
2013 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2014 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2015 pEntry->GuestInstruction.uUnused = 0;
2016 pEntry->GuestInstruction.fExec = fExec;
2017}
2018
2019
2020/**
2021 * Debug Info: Record info about guest register shadowing.
2022 */
2023static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2024 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2025{
2026 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2027 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2028 pEntry->GuestRegShadowing.uUnused = 0;
2029 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2030 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2031 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2032}
2033
2034#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2035
2036
2037/*********************************************************************************************************************************
2038* Register Allocator *
2039*********************************************************************************************************************************/
2040
2041/**
2042 * Register parameter indexes (indexed by argument number).
2043 */
2044DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2045{
2046 IEMNATIVE_CALL_ARG0_GREG,
2047 IEMNATIVE_CALL_ARG1_GREG,
2048 IEMNATIVE_CALL_ARG2_GREG,
2049 IEMNATIVE_CALL_ARG3_GREG,
2050#if defined(IEMNATIVE_CALL_ARG4_GREG)
2051 IEMNATIVE_CALL_ARG4_GREG,
2052# if defined(IEMNATIVE_CALL_ARG5_GREG)
2053 IEMNATIVE_CALL_ARG5_GREG,
2054# if defined(IEMNATIVE_CALL_ARG6_GREG)
2055 IEMNATIVE_CALL_ARG6_GREG,
2056# if defined(IEMNATIVE_CALL_ARG7_GREG)
2057 IEMNATIVE_CALL_ARG7_GREG,
2058# endif
2059# endif
2060# endif
2061#endif
2062};
2063
2064/**
2065 * Call register masks indexed by argument count.
2066 */
2067DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2068{
2069 0,
2070 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2071 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2072 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2073 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2074 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2075#if defined(IEMNATIVE_CALL_ARG4_GREG)
2076 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2077 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2078# if defined(IEMNATIVE_CALL_ARG5_GREG)
2079 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2080 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2081# if defined(IEMNATIVE_CALL_ARG6_GREG)
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2083 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2084 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2085# if defined(IEMNATIVE_CALL_ARG7_GREG)
2086 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2087 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2088 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2089# endif
2090# endif
2091# endif
2092#endif
2093};
2094
2095/**
2096 * Info about shadowed guest register values.
2097 * @see IEMNATIVEGSTREG
2098 */
2099static struct
2100{
2101 /** Offset in VMCPU. */
2102 uint32_t off;
2103 /** The field size. */
2104 uint8_t cb;
2105 /** Name (for logging). */
2106 const char *pszName;
2107} const g_aGstShadowInfo[] =
2108{
2109#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2110 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2111 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2112 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2113 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2114 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2115 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2116 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2117 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2118 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2119 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2120 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2121 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2122 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2123 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2124 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2125 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2126 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2127 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2128 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2129 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2130 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2131 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2132 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2133 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2134 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2135 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2136 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2137 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2138 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2139 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2140 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2141 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2142 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2143 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2144 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2145 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2146 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2147 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2148 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2149 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2150 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2151 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2152#undef CPUMCTX_OFF_AND_SIZE
2153};
2154AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2155
2156
2157/** Host CPU general purpose register names. */
2158const char * const g_apszIemNativeHstRegNames[] =
2159{
2160#ifdef RT_ARCH_AMD64
2161 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2162#elif RT_ARCH_ARM64
2163 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2164 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2165#else
2166# error "port me"
2167#endif
2168};
2169
2170
2171DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2172 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2173{
2174 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2175
2176 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2177 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2178 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2179 return (uint8_t)idxReg;
2180}
2181
2182
2183/**
2184 * Locate a register, possibly freeing one up.
2185 *
2186 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2187 * failed.
2188 *
2189 * @returns Host register number on success; throws VBox status code on failure, so no
2190 * need to check the return value.
2191 */
2192static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fAllowVolatile)
2193{
2194 uint32_t fRegMask = fAllowVolatile
2195 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
2196 : IEMNATIVE_HST_GREG_MASK & ~(IEMNATIVE_REG_FIXED_MASK | IEMNATIVE_CALL_VOLATILE_GREG_MASK);
2197
2198 /*
2199 * Try a freed register that's shadowing a guest register
2200 */
2201 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2202 if (fRegs)
2203 {
2204 /** @todo pick better here: */
2205 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2206
2207 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2208 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2209 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2210 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2211
2212 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2213 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2214 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2215 return idxReg;
2216 }
2217
2218 /*
2219 * Try free up a variable that's in a register.
2220 *
2221 * We do two rounds here, first evacuating variables we don't need to be
2222 * saved on the stack, then in the second round move things to the stack.
2223 */
2224 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2225 {
2226 uint32_t fVars = pReNative->Core.bmVars;
2227 while (fVars)
2228 {
2229 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2230 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2231 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2232 && (RT_BIT_32(idxReg) & fRegMask)
2233 && ( iLoop == 0
2234 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2235 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2236 {
2237 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2238 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2239 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2240 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2241 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2242
2243 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2244 {
2245 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
2246 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2247 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff,
2248 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2249 - IEMNATIVE_FP_OFF_STACK_VARS,
2250 idxReg);
2251 }
2252
2253 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2254 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2255 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2256 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2257 return idxReg;
2258 }
2259 fVars &= ~RT_BIT_32(idxVar);
2260 }
2261 }
2262
2263 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_OUT_OF_REGISTERS));
2264}
2265
2266
2267/**
2268 * Moves a variable to a different register or spills it onto the stack.
2269 *
2270 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2271 * kinds can easily be recreated if needed later.
2272 *
2273 * @returns The new code buffer position, UINT32_MAX on failure.
2274 * @param pReNative The native recompile state.
2275 * @param off The current code buffer position.
2276 * @param idxVar The variable index.
2277 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2278 * call-volatile registers.
2279 */
2280static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2281 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2282{
2283 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2284 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2285
2286 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2287 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2288 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2289 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2290 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2291 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2292 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2293 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2294
2295
2296 /** @todo Add statistics on this.*/
2297 /** @todo Implement basic variable liveness analysis (python) so variables
2298 * can be freed immediately once no longer used. This has the potential to
2299 * be trashing registers and stack for dead variables. */
2300
2301 /*
2302 * First try move it to a different register, as that's cheaper.
2303 */
2304 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2305 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2306 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2307 if (fRegs)
2308 {
2309 /* Avoid using shadow registers, if possible. */
2310 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2311 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2312 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2313
2314 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2315 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2316 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2317 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2318 if (fGstRegShadows)
2319 {
2320 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2321 while (fGstRegShadows)
2322 {
2323 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows);
2324 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2325
2326 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2327 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2328 }
2329 }
2330
2331 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2332 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2333 }
2334 /*
2335 * Otherwise we must spill the register onto the stack.
2336 */
2337 else
2338 {
2339 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2340 off = iemNativeEmitStoreGprByBp(pReNative, off,
2341 pReNative->Core.aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2342 - IEMNATIVE_FP_OFF_STACK_VARS,
2343 idxRegOld);
2344
2345 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2346 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2347 }
2348
2349 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2350 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2351 return off;
2352}
2353
2354
2355/**
2356 * Allocates a temporary host general purpose register.
2357 *
2358 * This may emit code to save register content onto the stack in order to free
2359 * up a register.
2360 *
2361 * @returns The host register number; throws VBox status code on failure,
2362 * so no need to check the return value.
2363 * @param pReNative The native recompile state.
2364 * @param poff Pointer to the variable with the code buffer position.
2365 * This will be update if we need to move a variable from
2366 * register to stack in order to satisfy the request.
2367 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2368 * registers (@c true, default) or the other way around
2369 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2370 */
2371DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2372{
2373 /*
2374 * Try find a completely unused register, preferably a call-volatile one.
2375 */
2376 uint8_t idxReg;
2377 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2378 & ~pReNative->Core.bmHstRegsWithGstShadow
2379 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2380 if (fRegs)
2381 {
2382 if (fPreferVolatile)
2383 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2384 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2385 else
2386 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2387 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2388 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2389 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2390 }
2391 else
2392 {
2393 idxReg = iemNativeRegAllocFindFree(pReNative, poff, true /*fAllowVolatile*/);
2394 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2395 }
2396 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2397}
2398
2399
2400/**
2401 * Allocates a temporary register for loading an immediate value into.
2402 *
2403 * This will emit code to load the immediate, unless there happens to be an
2404 * unused register with the value already loaded.
2405 *
2406 * The caller will not modify the returned register, it must be considered
2407 * read-only. Free using iemNativeRegFreeTmpImm.
2408 *
2409 * @returns The host register number; throws VBox status code on failure, so no
2410 * need to check the return value.
2411 * @param pReNative The native recompile state.
2412 * @param poff Pointer to the variable with the code buffer position.
2413 * @param uImm The immediate value that the register must hold upon
2414 * return.
2415 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2416 * registers (@c true, default) or the other way around
2417 * (@c false).
2418 *
2419 * @note Reusing immediate values has not been implemented yet.
2420 */
2421DECL_HIDDEN_THROW(uint8_t)
2422iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2423{
2424 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2425 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2426 return idxReg;
2427}
2428
2429
2430/**
2431 * Marks host register @a idxHstReg as containing a shadow copy of guest
2432 * register @a enmGstReg.
2433 *
2434 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2435 * host register before calling.
2436 */
2437DECL_FORCE_INLINE(void)
2438iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2439{
2440 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2441
2442 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2443 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2444 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2445 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2446#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2447 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2448 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2449#else
2450 RT_NOREF(off);
2451#endif
2452}
2453
2454
2455/**
2456 * Clear any guest register shadow claims from @a idxHstReg.
2457 *
2458 * The register does not need to be shadowing any guest registers.
2459 */
2460DECL_FORCE_INLINE(void)
2461iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2462{
2463 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2464 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2465 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2466 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2467
2468#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2469 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2470 if (fGstRegs)
2471 {
2472 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2473 while (fGstRegs)
2474 {
2475 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2476 fGstRegs &= ~RT_BIT_64(iGstReg);
2477 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2478 }
2479 }
2480#else
2481 RT_NOREF(off);
2482#endif
2483
2484 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2485 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2486 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2487}
2488
2489
2490/**
2491 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2492 * to @a idxRegTo.
2493 */
2494DECL_FORCE_INLINE(void)
2495iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2496 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2497{
2498 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2499 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2500 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2501 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2502 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2503
2504 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2505 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2506 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2507#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2508 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2509 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2510#else
2511 RT_NOREF(off);
2512#endif
2513}
2514
2515
2516/**
2517 * Allocates a temporary host general purpose register for keeping a guest
2518 * register value.
2519 *
2520 * Since we may already have a register holding the guest register value,
2521 * code will be emitted to do the loading if that's not the case. Code may also
2522 * be emitted if we have to free up a register to satify the request.
2523 *
2524 * @returns The host register number; throws VBox status code on failure, so no
2525 * need to check the return value.
2526 * @param pReNative The native recompile state.
2527 * @param poff Pointer to the variable with the code buffer
2528 * position. This will be update if we need to move a
2529 * variable from register to stack in order to satisfy
2530 * the request.
2531 * @param enmGstReg The guest register that will is to be updated.
2532 * @param enmIntendedUse How the caller will be using the host register.
2533 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2534 */
2535DECL_HIDDEN_THROW(uint8_t)
2536iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2537 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2538{
2539 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2540#ifdef LOG_ENABLED
2541 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2542#endif
2543
2544 /*
2545 * First check if the guest register value is already in a host register.
2546 */
2547 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2548 {
2549 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2550 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2551 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2552 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2553
2554 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2555 {
2556 /*
2557 * If the register will trash the guest shadow copy, try find a
2558 * completely unused register we can use instead. If that fails,
2559 * we need to disassociate the host reg from the guest reg.
2560 */
2561 /** @todo would be nice to know if preserving the register is in any way helpful. */
2562 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2563 && ( ~pReNative->Core.bmHstRegs
2564 & ~pReNative->Core.bmHstRegsWithGstShadow
2565 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2566 {
2567 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2568
2569 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2570
2571 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2572 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2573 g_apszIemNativeHstRegNames[idxRegNew]));
2574 idxReg = idxRegNew;
2575 }
2576 else
2577 {
2578 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2579 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2580 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2581 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2582 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2583 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2584 else
2585 {
2586 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2587 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2588 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2589 }
2590 }
2591 }
2592 else
2593 {
2594 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2595 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2596
2597 /*
2598 * Allocate a new register, copy the value and, if updating, the
2599 * guest shadow copy assignment to the new register.
2600 */
2601 /** @todo share register for readonly access. */
2602 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2603
2604 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2605
2606 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2607 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2608 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2609 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2610 else
2611 {
2612 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2613 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2614 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2615 g_apszIemNativeHstRegNames[idxRegNew]));
2616 }
2617 idxReg = idxRegNew;
2618 }
2619
2620#ifdef VBOX_STRICT
2621 /* Strict builds: Check that the value is correct. */
2622 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2623#endif
2624
2625 return idxReg;
2626 }
2627
2628 /*
2629 * Allocate a new register, load it with the guest value and designate it as a copy of the
2630 */
2631 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2632
2633 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2634
2635 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2636 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2637 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2638 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2639
2640 return idxRegNew;
2641}
2642
2643
2644/**
2645 * Allocates a temporary host general purpose register that already holds the
2646 * given guest register value.
2647 *
2648 * The use case for this function is places where the shadowing state cannot be
2649 * modified due to branching and such. This will fail if the we don't have a
2650 * current shadow copy handy or if it's incompatible. The only code that will
2651 * be emitted here is value checking code in strict builds.
2652 *
2653 * The intended use can only be readonly!
2654 *
2655 * @returns The host register number, UINT8_MAX if not present.
2656 * @param pReNative The native recompile state.
2657 * @param poff Pointer to the instruction buffer offset.
2658 * Will be updated in strict builds if a register is
2659 * found.
2660 * @param enmGstReg The guest register that will is to be updated.
2661 * @note In strict builds, this may throw instruction buffer growth failures.
2662 * Non-strict builds will not throw anything.
2663 * @sa iemNativeRegAllocTmpForGuestReg
2664 */
2665DECL_HIDDEN_THROW(uint8_t)
2666iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2667{
2668 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2669
2670 /*
2671 * First check if the guest register value is already in a host register.
2672 */
2673 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2674 {
2675 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2676 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2677 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2678 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2679
2680 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2681 {
2682 /*
2683 * We only do readonly use here, so easy compared to the other
2684 * variant of this code.
2685 */
2686 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2687 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2688 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2689 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2690 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2691
2692#ifdef VBOX_STRICT
2693 /* Strict builds: Check that the value is correct. */
2694 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2695#else
2696 RT_NOREF(poff);
2697#endif
2698 return idxReg;
2699 }
2700 }
2701
2702 return UINT8_MAX;
2703}
2704
2705
2706DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2707
2708
2709/**
2710 * Allocates argument registers for a function call.
2711 *
2712 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2713 * need to check the return value.
2714 * @param pReNative The native recompile state.
2715 * @param off The current code buffer offset.
2716 * @param cArgs The number of arguments the function call takes.
2717 */
2718DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2719{
2720 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2721 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2722 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2723 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2724
2725 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2726 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2727 else if (cArgs == 0)
2728 return true;
2729
2730 /*
2731 * Do we get luck and all register are free and not shadowing anything?
2732 */
2733 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2734 for (uint32_t i = 0; i < cArgs; i++)
2735 {
2736 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2737 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2738 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2739 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2740 }
2741 /*
2742 * Okay, not lucky so we have to free up the registers.
2743 */
2744 else
2745 for (uint32_t i = 0; i < cArgs; i++)
2746 {
2747 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2748 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2749 {
2750 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2751 {
2752 case kIemNativeWhat_Var:
2753 {
2754 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2755 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2756 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2757 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2758 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2759
2760 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2761 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2762 else
2763 {
2764 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2765 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2766 }
2767 break;
2768 }
2769
2770 case kIemNativeWhat_Tmp:
2771 case kIemNativeWhat_Arg:
2772 case kIemNativeWhat_rc:
2773 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2774 default:
2775 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2776 }
2777
2778 }
2779 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2780 {
2781 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2782 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2783 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2784 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2785 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2786 }
2787 else
2788 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2789 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2790 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2791 }
2792 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2793 return true;
2794}
2795
2796
2797DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2798
2799
2800#if 0
2801/**
2802 * Frees a register assignment of any type.
2803 *
2804 * @param pReNative The native recompile state.
2805 * @param idxHstReg The register to free.
2806 *
2807 * @note Does not update variables.
2808 */
2809DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2810{
2811 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2812 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2813 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2814 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2815 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2816 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2817 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2818 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2819 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2820 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2821 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2822 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2823 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2824 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2825
2826 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2827 /* no flushing, right:
2828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2830 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2831 */
2832}
2833#endif
2834
2835
2836/**
2837 * Frees a temporary register.
2838 *
2839 * Any shadow copies of guest registers assigned to the host register will not
2840 * be flushed by this operation.
2841 */
2842DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2843{
2844 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2845 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2846 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2847 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2848 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2849}
2850
2851
2852/**
2853 * Frees a temporary immediate register.
2854 *
2855 * It is assumed that the call has not modified the register, so it still hold
2856 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2857 */
2858DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2859{
2860 iemNativeRegFreeTmp(pReNative, idxHstReg);
2861}
2862
2863
2864/**
2865 * Called right before emitting a call instruction to move anything important
2866 * out of call-volatile registers, free and flush the call-volatile registers,
2867 * optionally freeing argument variables.
2868 *
2869 * @returns New code buffer offset, UINT32_MAX on failure.
2870 * @param pReNative The native recompile state.
2871 * @param off The code buffer offset.
2872 * @param cArgs The number of arguments the function call takes.
2873 * It is presumed that the host register part of these have
2874 * been allocated as such already and won't need moving,
2875 * just freeing.
2876 * @param fFreeArgVars Whether to free argument variables for the call.
2877 */
2878DECL_HIDDEN_THROW(uint32_t)
2879iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, bool fFreeArgVars)
2880{
2881 /*
2882 * Free argument variables first (simplified).
2883 */
2884 AssertStmt(cArgs <= RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
2885 if (fFreeArgVars && cArgs > 0)
2886 {
2887 for (uint32_t i = 0; i < cArgs; i++)
2888 {
2889 uint8_t idxVar = pReNative->Core.aidxArgVars[i];
2890 if (idxVar < RT_ELEMENTS(pReNative->Core.aVars))
2891 {
2892 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
2893 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2894 Assert( pReNative->Core.aVars[idxVar].idxReg
2895 == (i < RT_ELEMENTS(g_aidxIemNativeCallRegs) ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
2896 }
2897 }
2898 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2899 }
2900
2901 /*
2902 * Move anything important out of volatile registers.
2903 */
2904 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2905 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2906 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2907#ifdef IEMNATIVE_REG_FIXED_TMP0
2908 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2909#endif
2910 & ~g_afIemNativeCallRegs[cArgs];
2911
2912 fRegsToMove &= pReNative->Core.bmHstRegs;
2913 if (!fRegsToMove)
2914 { /* likely */ }
2915 else
2916 while (fRegsToMove != 0)
2917 {
2918 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2919 fRegsToMove &= ~RT_BIT_32(idxReg);
2920
2921 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2922 {
2923 case kIemNativeWhat_Var:
2924 {
2925 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2926 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2927 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2928 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2929 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2930 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2931 else
2932 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2933 continue;
2934 }
2935
2936 case kIemNativeWhat_Arg:
2937 AssertMsgFailed(("What?!?: %u\n", idxReg));
2938 continue;
2939
2940 case kIemNativeWhat_rc:
2941 case kIemNativeWhat_Tmp:
2942 AssertMsgFailed(("Missing free: %u\n", idxReg));
2943 continue;
2944
2945 case kIemNativeWhat_FixedTmp:
2946 case kIemNativeWhat_pVCpuFixed:
2947 case kIemNativeWhat_pCtxFixed:
2948 case kIemNativeWhat_FixedReserved:
2949 case kIemNativeWhat_Invalid:
2950 case kIemNativeWhat_End:
2951 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
2952 }
2953 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
2954 }
2955
2956 /*
2957 * Do the actual freeing.
2958 */
2959 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2960
2961 /* If there are guest register shadows in any call-volatile register, we
2962 have to clear the corrsponding guest register masks for each register. */
2963 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2964 if (fHstRegsWithGstShadow)
2965 {
2966 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
2967 do
2968 {
2969 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
2970 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
2971
2972 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2973 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2974 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2975 } while (fHstRegsWithGstShadow != 0);
2976 }
2977
2978 return off;
2979}
2980
2981
2982/**
2983 * Flushes a set of guest register shadow copies.
2984 *
2985 * This is usually done after calling a threaded function or a C-implementation
2986 * of an instruction.
2987 *
2988 * @param pReNative The native recompile state.
2989 * @param fGstRegs Set of guest registers to flush.
2990 */
2991DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
2992{
2993 /*
2994 * Reduce the mask by what's currently shadowed
2995 */
2996 fGstRegs &= pReNative->Core.bmGstRegShadows;
2997 if (fGstRegs)
2998 {
2999 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3000 if (pReNative->Core.bmGstRegShadows)
3001 {
3002 /*
3003 * Partial.
3004 */
3005 do
3006 {
3007 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3008 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3009 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3010 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3011 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3012
3013 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3014 fGstRegs &= ~fInThisHstReg;
3015 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
3016 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3017 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3018 } while (fGstRegs != 0);
3019 }
3020 else
3021 {
3022 /*
3023 * Clear all.
3024 */
3025 do
3026 {
3027 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3028 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3029 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3030 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3031 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3032
3033 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3034 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3035 } while (fGstRegs != 0);
3036 pReNative->Core.bmHstRegsWithGstShadow = 0;
3037 }
3038 }
3039}
3040
3041
3042/**
3043 * Flushes any delayed guest register writes.
3044 *
3045 * This must be called prior to calling CImpl functions and any helpers that use
3046 * the guest state (like raising exceptions) and such.
3047 *
3048 * This optimization has not yet been implemented. The first target would be
3049 * RIP updates, since these are the most common ones.
3050 */
3051DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3052{
3053 RT_NOREF(pReNative, off);
3054 return off;
3055}
3056
3057
3058/*********************************************************************************************************************************
3059* Code Emitters (larger snippets) *
3060*********************************************************************************************************************************/
3061
3062/**
3063 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3064 * extending to 64-bit width.
3065 *
3066 * @returns New code buffer offset on success, UINT32_MAX on failure.
3067 * @param pReNative .
3068 * @param off The current code buffer position.
3069 * @param idxHstReg The host register to load the guest register value into.
3070 * @param enmGstReg The guest register to load.
3071 *
3072 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3073 * that is something the caller needs to do if applicable.
3074 */
3075DECL_HIDDEN_THROW(uint32_t)
3076iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3077{
3078 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3079 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3080
3081 switch (g_aGstShadowInfo[enmGstReg].cb)
3082 {
3083 case sizeof(uint64_t):
3084 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3085 case sizeof(uint32_t):
3086 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3087 case sizeof(uint16_t):
3088 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3089#if 0 /* not present in the table. */
3090 case sizeof(uint8_t):
3091 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3092#endif
3093 default:
3094 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3095 }
3096}
3097
3098
3099#ifdef VBOX_STRICT
3100/**
3101 * Emitting code that checks that the content of register @a idxReg is the same
3102 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3103 * instruction if that's not the case.
3104 *
3105 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3106 * Trashes EFLAGS on AMD64.
3107 */
3108static uint32_t
3109iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3110{
3111# ifdef RT_ARCH_AMD64
3112 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3113
3114 /* cmp reg, [mem] */
3115 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3116 {
3117 if (idxReg >= 8)
3118 pbCodeBuf[off++] = X86_OP_REX_R;
3119 pbCodeBuf[off++] = 0x38;
3120 }
3121 else
3122 {
3123 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3124 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3125 else
3126 {
3127 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3128 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3129 else
3130 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3131 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_7));
3132 if (idxReg >= 8)
3133 pbCodeBuf[off++] = X86_OP_REX_R;
3134 }
3135 pbCodeBuf[off++] = 0x39;
3136 }
3137 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3138
3139 /* je/jz +1 */
3140 pbCodeBuf[off++] = 0x74;
3141 pbCodeBuf[off++] = 0x01;
3142
3143 /* int3 */
3144 pbCodeBuf[off++] = 0xcc;
3145
3146 /* For values smaller than the register size, we must check that the rest
3147 of the register is all zeros. */
3148 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3149 {
3150 /* test reg64, imm32 */
3151 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3152 pbCodeBuf[off++] = 0xf7;
3153 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3154 pbCodeBuf[off++] = 0;
3155 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3156 pbCodeBuf[off++] = 0xff;
3157 pbCodeBuf[off++] = 0xff;
3158
3159 /* je/jz +1 */
3160 pbCodeBuf[off++] = 0x74;
3161 pbCodeBuf[off++] = 0x01;
3162
3163 /* int3 */
3164 pbCodeBuf[off++] = 0xcc;
3165 }
3166 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3167 {
3168 /* rol reg64, 32 */
3169 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3170 pbCodeBuf[off++] = 0xc1;
3171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3172 pbCodeBuf[off++] = 32;
3173
3174 /* test reg32, ffffffffh */
3175 if (idxReg >= 8)
3176 pbCodeBuf[off++] = X86_OP_REX_B;
3177 pbCodeBuf[off++] = 0xf7;
3178 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3179 pbCodeBuf[off++] = 0xff;
3180 pbCodeBuf[off++] = 0xff;
3181 pbCodeBuf[off++] = 0xff;
3182 pbCodeBuf[off++] = 0xff;
3183
3184 /* je/jz +1 */
3185 pbCodeBuf[off++] = 0x74;
3186 pbCodeBuf[off++] = 0x01;
3187
3188 /* int3 */
3189 pbCodeBuf[off++] = 0xcc;
3190
3191 /* rol reg64, 32 */
3192 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3193 pbCodeBuf[off++] = 0xc1;
3194 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3195 pbCodeBuf[off++] = 32;
3196 }
3197
3198# elif defined(RT_ARCH_ARM64)
3199 /* mov TMP0, [gstreg] */
3200 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3201
3202 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3203 /* sub tmp0, tmp0, idxReg */
3204 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3205 /* cbz tmp0, +1 */
3206 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
3207 /* brk #0x1000+enmGstReg */
3208 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3209
3210# else
3211# error "Port me!"
3212# endif
3213 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3214 return off;
3215}
3216#endif /* VBOX_STRICT */
3217
3218
3219
3220/**
3221 * Emits a code for checking the return code of a call and rcPassUp, returning
3222 * from the code if either are non-zero.
3223 */
3224DECL_HIDDEN_THROW(uint32_t)
3225iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3226{
3227#ifdef RT_ARCH_AMD64
3228 /*
3229 * AMD64: eax = call status code.
3230 */
3231
3232 /* edx = rcPassUp */
3233 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3234# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3235 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3236# endif
3237
3238 /* edx = eax | rcPassUp */
3239 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3240 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3241 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3242 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3243
3244 /* Jump to non-zero status return path. */
3245 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3246
3247 /* done. */
3248
3249#elif RT_ARCH_ARM64
3250 /*
3251 * ARM64: w0 = call status code.
3252 */
3253 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3254 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3255
3256 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3257
3258 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3259
3260 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3261 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3262 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
3263
3264#else
3265# error "port me"
3266#endif
3267 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3268 return off;
3269}
3270
3271
3272/**
3273 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3274 * raising a \#GP(0) if it isn't.
3275 *
3276 * @returns New code buffer offset, UINT32_MAX on failure.
3277 * @param pReNative The native recompile state.
3278 * @param off The code buffer offset.
3279 * @param idxAddrReg The host register with the address to check.
3280 * @param idxInstr The current instruction.
3281 */
3282DECL_HIDDEN_THROW(uint32_t)
3283iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3284{
3285 RT_NOREF(idxInstr);
3286
3287 /*
3288 * Make sure we don't have any outstanding guest register writes as we may
3289 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3290 */
3291 off = iemNativeRegFlushPendingWrites(pReNative, off);
3292
3293#ifdef RT_ARCH_AMD64
3294 /*
3295 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3296 * return raisexcpt();
3297 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3298 */
3299 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3300
3301 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3302 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3303 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3304 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3305
3306# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3307 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3308# else
3309 uint32_t const offFixup = off;
3310 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3311 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3312 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3313 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3314# endif
3315
3316 iemNativeRegFreeTmp(pReNative, iTmpReg);
3317
3318#elif defined(RT_ARCH_ARM64)
3319 /*
3320 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3321 * return raisexcpt();
3322 * ----
3323 * mov x1, 0x800000000000
3324 * add x1, x0, x1
3325 * cmp xzr, x1, lsr 48
3326 * and either:
3327 * b.ne .Lraisexcpt
3328 * or:
3329 * b.eq .Lnoexcept
3330 * movz x1, #instruction-number
3331 * b .Lraisexcpt
3332 * .Lnoexcept:
3333 */
3334 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3335
3336 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3337 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3338 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3339
3340# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3341 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3342# else
3343 uint32_t const offFixup = off;
3344 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3345 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3346 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3347 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3348# endif
3349
3350 iemNativeRegFreeTmp(pReNative, iTmpReg);
3351
3352#else
3353# error "Port me"
3354#endif
3355 return off;
3356}
3357
3358
3359/**
3360 * Emits code to check if the content of @a idxAddrReg is within the limit of
3361 * idxSegReg, raising a \#GP(0) if it isn't.
3362 *
3363 * @returns New code buffer offset, UINT32_MAX on failure.
3364 * @param pReNative The native recompile state.
3365 * @param off The code buffer offset.
3366 * @param idxAddrReg The host register (32-bit) with the address to
3367 * check.
3368 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3369 * against.
3370 * @param idxInstr The current instruction.
3371 */
3372DECL_HIDDEN_THROW(uint32_t)
3373iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3374 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3375{
3376 /*
3377 * Make sure we don't have any outstanding guest register writes as we may
3378 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3379 */
3380 off = iemNativeRegFlushPendingWrites(pReNative, off);
3381
3382 /** @todo implement expand down/whatnot checking */
3383 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3384
3385 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3386 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3387 kIemNativeGstRegUse_ForUpdate);
3388
3389 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3390
3391#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3392 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3393 RT_NOREF(idxInstr);
3394#else
3395 uint32_t const offFixup = off;
3396 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3397 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3398 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3399 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3400#endif
3401
3402 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3403 return off;
3404}
3405
3406
3407/**
3408 * Emits a call to a CImpl function or something similar.
3409 */
3410static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3411 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3412 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3413{
3414 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3415 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3416
3417 /*
3418 * Load the parameters.
3419 */
3420#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3421 /* Special code the hidden VBOXSTRICTRC pointer. */
3422 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3423 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3424 if (cAddParams > 0)
3425 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3426 if (cAddParams > 1)
3427 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3428 if (cAddParams > 2)
3429 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3430 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3431
3432#else
3433 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3434 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3435 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3436 if (cAddParams > 0)
3437 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3438 if (cAddParams > 1)
3439 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3440 if (cAddParams > 2)
3441# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3442 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3443# else
3444 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3445# endif
3446#endif
3447
3448 /*
3449 * Make the call.
3450 */
3451 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3452
3453#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3454 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3455#endif
3456
3457 /*
3458 * Check the status code.
3459 */
3460 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3461}
3462
3463
3464/**
3465 * Emits a call to a threaded worker function.
3466 */
3467static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3468{
3469 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3470 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3471 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3472
3473#ifdef RT_ARCH_AMD64
3474 /* Load the parameters and emit the call. */
3475# ifdef RT_OS_WINDOWS
3476# ifndef VBOXSTRICTRC_STRICT_ENABLED
3477 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3478 if (cParams > 0)
3479 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3480 if (cParams > 1)
3481 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3482 if (cParams > 2)
3483 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3484# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3485 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3486 if (cParams > 0)
3487 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3488 if (cParams > 1)
3489 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3490 if (cParams > 2)
3491 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3492 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3493 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3494# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3495# else
3496 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3497 if (cParams > 0)
3498 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3499 if (cParams > 1)
3500 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3501 if (cParams > 2)
3502 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3503# endif
3504
3505 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3506
3507# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3508 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3509# endif
3510
3511#elif RT_ARCH_ARM64
3512 /*
3513 * ARM64:
3514 */
3515 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3516 if (cParams > 0)
3517 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3518 if (cParams > 1)
3519 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3520 if (cParams > 2)
3521 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3522
3523 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3524
3525#else
3526# error "port me"
3527#endif
3528
3529 /*
3530 * Check the status code.
3531 */
3532 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3533
3534 return off;
3535}
3536
3537
3538/**
3539 * Emits the code at the RaiseGP0 label.
3540 */
3541static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3542{
3543 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3544 if (idxLabel != UINT32_MAX)
3545 {
3546 iemNativeLabelDefine(pReNative, idxLabel, off);
3547
3548 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3549 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3550#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3551 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3552#endif
3553 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3554
3555 /* jump back to the return sequence. */
3556 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3557 }
3558 return off;
3559}
3560
3561
3562/**
3563 * Emits the code at the ReturnWithFlags label (returns
3564 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3565 */
3566static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3567{
3568 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3569 if (idxLabel != UINT32_MAX)
3570 {
3571 iemNativeLabelDefine(pReNative, idxLabel, off);
3572
3573 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3574
3575 /* jump back to the return sequence. */
3576 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3577 }
3578 return off;
3579}
3580
3581
3582/**
3583 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3584 */
3585static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3586{
3587 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3588 if (idxLabel != UINT32_MAX)
3589 {
3590 iemNativeLabelDefine(pReNative, idxLabel, off);
3591
3592 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3593
3594 /* jump back to the return sequence. */
3595 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3596 }
3597 return off;
3598}
3599
3600
3601/**
3602 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3603 */
3604static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3605{
3606 /*
3607 * Generate the rc + rcPassUp fiddling code if needed.
3608 */
3609 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3610 if (idxLabel != UINT32_MAX)
3611 {
3612 iemNativeLabelDefine(pReNative, idxLabel, off);
3613
3614 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3615#ifdef RT_ARCH_AMD64
3616# ifdef RT_OS_WINDOWS
3617# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3619# endif
3620 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3621 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3622# else
3623 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3624 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3625# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3626 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3627# endif
3628# endif
3629# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3630 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3631# endif
3632
3633#else
3634 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3635 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3636 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3637#endif
3638
3639 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3640 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3641 }
3642 return off;
3643}
3644
3645
3646/**
3647 * Emits a standard epilog.
3648 */
3649static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3650{
3651 *pidxReturnLabel = UINT32_MAX;
3652
3653 /*
3654 * Successful return, so clear the return register (eax, w0).
3655 */
3656 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3657
3658 /*
3659 * Define label for common return point.
3660 */
3661 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3662 *pidxReturnLabel = idxReturn;
3663
3664 /*
3665 * Restore registers and return.
3666 */
3667#ifdef RT_ARCH_AMD64
3668 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3669
3670 /* Reposition esp at the r15 restore point. */
3671 pbCodeBuf[off++] = X86_OP_REX_W;
3672 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3673 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3674 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3675
3676 /* Pop non-volatile registers and return */
3677 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3678 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3679 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3680 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3681 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3682 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3683 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3684 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3685# ifdef RT_OS_WINDOWS
3686 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3687 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3688# endif
3689 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3690 pbCodeBuf[off++] = 0xc9; /* leave */
3691 pbCodeBuf[off++] = 0xc3; /* ret */
3692 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3693
3694#elif RT_ARCH_ARM64
3695 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3696
3697 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3698 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3699 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3700 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3701 IEMNATIVE_FRAME_VAR_SIZE / 8);
3702 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3703 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3704 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3705 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3706 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3707 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3708 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3709 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3710 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3711 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3712 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3713 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3714
3715 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3716 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3717 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3718 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3719
3720 /* retab / ret */
3721# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3722 if (1)
3723 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3724 else
3725# endif
3726 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3727
3728#else
3729# error "port me"
3730#endif
3731 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3732
3733 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3734}
3735
3736
3737/**
3738 * Emits a standard prolog.
3739 */
3740static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3741{
3742#ifdef RT_ARCH_AMD64
3743 /*
3744 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3745 * reserving 64 bytes for stack variables plus 4 non-register argument
3746 * slots. Fixed register assignment: xBX = pReNative;
3747 *
3748 * Since we always do the same register spilling, we can use the same
3749 * unwind description for all the code.
3750 */
3751 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3752 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3753 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3754 pbCodeBuf[off++] = 0x8b;
3755 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3756 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3757 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3758# ifdef RT_OS_WINDOWS
3759 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3760 pbCodeBuf[off++] = 0x8b;
3761 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3762 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3763 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3764# else
3765 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3766 pbCodeBuf[off++] = 0x8b;
3767 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3768# endif
3769 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3770 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3771 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3772 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3773 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3774 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3775 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3776 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3777
3778 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3779 X86_GREG_xSP,
3780 IEMNATIVE_FRAME_ALIGN_SIZE
3781 + IEMNATIVE_FRAME_VAR_SIZE
3782 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3783 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3784 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3785 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3786 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3787
3788#elif RT_ARCH_ARM64
3789 /*
3790 * We set up a stack frame exactly like on x86, only we have to push the
3791 * return address our selves here. We save all non-volatile registers.
3792 */
3793 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3794
3795# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3796 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3797 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3798 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3799 /* pacibsp */
3800 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3801# endif
3802
3803 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3804 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3805 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3806 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3807 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3808 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3809 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3810 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3811 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3812 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3813 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3814 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3815 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3816 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3817 /* Save the BP and LR (ret address) registers at the top of the frame. */
3818 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3819 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3820 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3821 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3822 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3823 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3824
3825 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3826 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3827
3828 /* mov r28, r0 */
3829 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3830 /* mov r27, r1 */
3831 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3832
3833#else
3834# error "port me"
3835#endif
3836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3837 return off;
3838}
3839
3840
3841
3842/*********************************************************************************************************************************
3843* Emitters for IEM_MC_XXXX and the associated IEM_MC_XXXX recompiler definitions *
3844*********************************************************************************************************************************/
3845
3846#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3847 {
3848
3849/** We have to get to the end in recompilation mode, as otherwise we won't
3850 * generate code for all the IEM_MC_IF_XXX branches. */
3851#define IEM_MC_END() \
3852 } return off
3853
3854
3855/*
3856 * Standalone CImpl deferals.
3857 */
3858
3859#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3860 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3861
3862
3863#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3864 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3865
3866DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3867 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3868{
3869 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3870}
3871
3872
3873#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3874 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3875
3876DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3877 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3878{
3879 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3880}
3881
3882
3883#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3884 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3885
3886DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3887 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
3888 uint64_t uArg2)
3889{
3890 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3891}
3892
3893
3894/*
3895 * Advancing PC/RIP/EIP/IP.
3896 */
3897
3898/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
3899 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
3900DECL_INLINE_THROW(uint32_t)
3901iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3902{
3903 /*
3904 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
3905 * return with special status code and make the execution loop deal with
3906 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
3907 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
3908 * could continue w/o interruption, it probably will drop into the
3909 * debugger, so not worth the effort of trying to services it here and we
3910 * just lump it in with the handling of the others.
3911 *
3912 * To simplify the code and the register state management even more (wrt
3913 * immediate in AND operation), we always update the flags and skip the
3914 * extra check associated conditional jump.
3915 */
3916 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
3917 <= UINT32_MAX);
3918 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3919 kIemNativeGstRegUse_ForUpdate);
3920 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
3921 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
3922 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
3923 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
3924 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
3925
3926 /* Free but don't flush the EFLAGS register. */
3927 iemNativeRegFreeTmp(pReNative, idxEflReg);
3928
3929 return off;
3930}
3931
3932
3933#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
3934 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
3935
3936#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
3937 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
3938 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
3939
3940/** Same as iemRegAddToRip64AndFinishingNoFlags. */
3941DECL_INLINE_THROW(uint32_t)
3942iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3943{
3944 /* Allocate a temporary PC register. */
3945 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3946
3947 /* Perform the addition and store the result. */
3948 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
3949 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3950
3951 /* Free but don't flush the PC register. */
3952 iemNativeRegFreeTmp(pReNative, idxPcReg);
3953
3954 return off;
3955}
3956
3957
3958#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
3959 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
3960
3961#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
3962 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
3963 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
3964
3965/** Same as iemRegAddToEip32AndFinishingNoFlags. */
3966DECL_INLINE_THROW(uint32_t)
3967iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3968{
3969 /* Allocate a temporary PC register. */
3970 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3971
3972 /* Perform the addition and store the result. */
3973 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3974 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3975
3976 /* Free but don't flush the PC register. */
3977 iemNativeRegFreeTmp(pReNative, idxPcReg);
3978
3979 return off;
3980}
3981
3982
3983#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
3984 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
3985
3986#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
3987 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
3988 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
3989
3990/** Same as iemRegAddToIp16AndFinishingNoFlags. */
3991DECL_INLINE_THROW(uint32_t)
3992iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3993{
3994 /* Allocate a temporary PC register. */
3995 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3996
3997 /* Perform the addition and store the result. */
3998 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3999 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4000 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4001
4002 /* Free but don't flush the PC register. */
4003 iemNativeRegFreeTmp(pReNative, idxPcReg);
4004
4005 return off;
4006}
4007
4008
4009/*
4010 * Changing PC/RIP/EIP/IP with a relative jump.
4011 */
4012
4013#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4014 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4015 (a_enmEffOpSize), pCallEntry->idxInstr)
4016
4017#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4018 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4019 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4020
4021#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4022 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4023 IEMMODE_16BIT, pCallEntry->idxInstr)
4024
4025#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4026 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4027 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4028
4029#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4030 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4031 IEMMODE_64BIT, pCallEntry->idxInstr)
4032
4033#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4034 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4035 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4036
4037/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4038 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4039 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4040DECL_INLINE_THROW(uint32_t)
4041iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4042 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4043{
4044 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4045
4046 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4047 off = iemNativeRegFlushPendingWrites(pReNative, off);
4048
4049 /* Allocate a temporary PC register. */
4050 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4051
4052 /* Perform the addition. */
4053 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4054
4055 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4056 {
4057 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4058 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4059 }
4060 else
4061 {
4062 /* Just truncate the result to 16-bit IP. */
4063 Assert(enmEffOpSize == IEMMODE_16BIT);
4064 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4065 }
4066 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4067
4068 /* Free but don't flush the PC register. */
4069 iemNativeRegFreeTmp(pReNative, idxPcReg);
4070
4071 return off;
4072}
4073
4074
4075#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4076 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4077 (a_enmEffOpSize), pCallEntry->idxInstr)
4078
4079#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4080 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4081 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4082
4083#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4084 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4085 IEMMODE_16BIT, pCallEntry->idxInstr)
4086
4087#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4088 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4089 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4090
4091#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4092 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4093 IEMMODE_32BIT, pCallEntry->idxInstr)
4094
4095#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4096 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4098
4099/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4100 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4101 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4102DECL_INLINE_THROW(uint32_t)
4103iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4104 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4105{
4106 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4107
4108 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4109 off = iemNativeRegFlushPendingWrites(pReNative, off);
4110
4111 /* Allocate a temporary PC register. */
4112 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4113
4114 /* Perform the addition. */
4115 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4116
4117 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4118 if (enmEffOpSize == IEMMODE_16BIT)
4119 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4120
4121 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4122 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4123
4124 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4125
4126 /* Free but don't flush the PC register. */
4127 iemNativeRegFreeTmp(pReNative, idxPcReg);
4128
4129 return off;
4130}
4131
4132
4133#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4134 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4135
4136#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4137 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4138 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4139
4140#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4141 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4142
4143#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4144 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4145 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4146
4147#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4148 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4149
4150#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4151 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4152 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4153
4154/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4155DECL_INLINE_THROW(uint32_t)
4156iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4157 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4158{
4159 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4160 off = iemNativeRegFlushPendingWrites(pReNative, off);
4161
4162 /* Allocate a temporary PC register. */
4163 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4164
4165 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4166 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4167 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4168 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4169 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4170
4171 /* Free but don't flush the PC register. */
4172 iemNativeRegFreeTmp(pReNative, idxPcReg);
4173
4174 return off;
4175}
4176
4177
4178/*
4179 * Conditionals.
4180 */
4181
4182/**
4183 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4184 *
4185 * @returns Pointer to the condition stack entry on success, NULL on failure
4186 * (too many nestings)
4187 */
4188DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4189{
4190 uint32_t const idxStack = pReNative->cCondDepth;
4191 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4192
4193 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4194 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4195
4196 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4197 pEntry->fInElse = false;
4198 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4199 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4200
4201 return pEntry;
4202}
4203
4204
4205/**
4206 * Start of the if-block, snapshotting the register and variable state.
4207 */
4208DECL_INLINE_THROW(void)
4209iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4210{
4211 Assert(offIfBlock != UINT32_MAX);
4212 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4213 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4214 Assert(!pEntry->fInElse);
4215
4216 /* Define the start of the IF block if request or for disassembly purposes. */
4217 if (idxLabelIf != UINT32_MAX)
4218 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4219#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4220 else
4221 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4222#else
4223 RT_NOREF(offIfBlock);
4224#endif
4225
4226 /* Copy the initial state so we can restore it in the 'else' block. */
4227 pEntry->InitialState = pReNative->Core;
4228}
4229
4230
4231#define IEM_MC_ELSE() } while (0); \
4232 off = iemNativeEmitElse(pReNative, off); \
4233 do {
4234
4235/** Emits code related to IEM_MC_ELSE. */
4236DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4237{
4238 /* Check sanity and get the conditional stack entry. */
4239 Assert(off != UINT32_MAX);
4240 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4241 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4242 Assert(!pEntry->fInElse);
4243
4244 /* Jump to the endif */
4245 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4246
4247 /* Define the else label and enter the else part of the condition. */
4248 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4249 pEntry->fInElse = true;
4250
4251 /* Snapshot the core state so we can do a merge at the endif and restore
4252 the snapshot we took at the start of the if-block. */
4253 pEntry->IfFinalState = pReNative->Core;
4254 pReNative->Core = pEntry->InitialState;
4255
4256 return off;
4257}
4258
4259
4260#define IEM_MC_ENDIF() } while (0); \
4261 off = iemNativeEmitEndIf(pReNative, off)
4262
4263/** Emits code related to IEM_MC_ENDIF. */
4264DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4265{
4266 /* Check sanity and get the conditional stack entry. */
4267 Assert(off != UINT32_MAX);
4268 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4269 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4270
4271 /*
4272 * Now we have find common group with the core state at the end of the
4273 * if-final. Use the smallest common denominator and just drop anything
4274 * that isn't the same in both states.
4275 */
4276 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4277 * which is why we're doing this at the end of the else-block.
4278 * But we'd need more info about future for that to be worth the effort. */
4279 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4280 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4281 {
4282 /* shadow guest stuff first. */
4283 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4284 if (fGstRegs)
4285 {
4286 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4287 do
4288 {
4289 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4290 fGstRegs &= ~RT_BIT_64(idxGstReg);
4291
4292 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4293 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4294 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4295 {
4296 Log12(("iemNativeEmitEndIf: dropping gst %#RX64 from hst %s\n",
4297 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4298 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4299 }
4300 } while (fGstRegs);
4301 }
4302 else
4303 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4304
4305 /* Check variables next. For now we must require them to be identical
4306 or stuff we can recreate. */
4307 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4308 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4309 if (fVars)
4310 {
4311 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4312 do
4313 {
4314 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4315 fVars &= ~RT_BIT_32(idxVar);
4316
4317 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4318 {
4319 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4320 continue;
4321 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4322 {
4323 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4324 if (idxHstReg != UINT8_MAX)
4325 {
4326 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4327 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4328 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4329 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4330 }
4331 continue;
4332 }
4333 }
4334 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4335 continue;
4336
4337 /* Irreconcilable, so drop it. */
4338 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4339 if (idxHstReg != UINT8_MAX)
4340 {
4341 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4342 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4343 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4344 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4345 }
4346 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4347 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4348 } while (fVars);
4349 }
4350
4351 /* Finally, check that the host register allocations matches. */
4352 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4353 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4354 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4355 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4356 }
4357
4358 /*
4359 * Define the endif label and maybe the else one if we're still in the 'if' part.
4360 */
4361 if (!pEntry->fInElse)
4362 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4363 else
4364 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4365 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4366
4367 /* Pop the conditional stack.*/
4368 pReNative->cCondDepth -= 1;
4369
4370 return off;
4371}
4372
4373
4374#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4375 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4376 do {
4377
4378/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4379DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4380{
4381 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4382
4383 /* Get the eflags. */
4384 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4385 kIemNativeGstRegUse_ReadOnly);
4386
4387 /* Test and jump. */
4388 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4389
4390 /* Free but don't flush the EFlags register. */
4391 iemNativeRegFreeTmp(pReNative, idxEflReg);
4392
4393 /* Make a copy of the core state now as we start the if-block. */
4394 iemNativeCondStartIfBlock(pReNative, off);
4395
4396 return off;
4397}
4398
4399
4400#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4401 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4402 do {
4403
4404/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4405DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4406{
4407 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4408
4409 /* Get the eflags. */
4410 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4411 kIemNativeGstRegUse_ReadOnly);
4412
4413 /* Test and jump. */
4414 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4415
4416 /* Free but don't flush the EFlags register. */
4417 iemNativeRegFreeTmp(pReNative, idxEflReg);
4418
4419 /* Make a copy of the core state now as we start the if-block. */
4420 iemNativeCondStartIfBlock(pReNative, off);
4421
4422 return off;
4423}
4424
4425
4426#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4427 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4428 do {
4429
4430/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4431DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4432{
4433 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4434
4435 /* Get the eflags. */
4436 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4437 kIemNativeGstRegUse_ReadOnly);
4438
4439 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4440 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4441
4442 /* Test and jump. */
4443 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4444
4445 /* Free but don't flush the EFlags register. */
4446 iemNativeRegFreeTmp(pReNative, idxEflReg);
4447
4448 /* Make a copy of the core state now as we start the if-block. */
4449 iemNativeCondStartIfBlock(pReNative, off);
4450
4451 return off;
4452}
4453
4454
4455#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4456 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4457 do {
4458
4459/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4460DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4461{
4462 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4463
4464 /* Get the eflags. */
4465 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4466 kIemNativeGstRegUse_ReadOnly);
4467
4468 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4469 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4470
4471 /* Test and jump. */
4472 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4473
4474 /* Free but don't flush the EFlags register. */
4475 iemNativeRegFreeTmp(pReNative, idxEflReg);
4476
4477 /* Make a copy of the core state now as we start the if-block. */
4478 iemNativeCondStartIfBlock(pReNative, off);
4479
4480 return off;
4481}
4482
4483
4484#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4485 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4486 do {
4487
4488#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4489 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4490 do {
4491
4492/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4493DECL_INLINE_THROW(uint32_t)
4494iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4495 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4496{
4497 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4498
4499 /* Get the eflags. */
4500 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4501 kIemNativeGstRegUse_ReadOnly);
4502
4503 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4504 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4505
4506 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4507 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4508 Assert(iBitNo1 != iBitNo2);
4509
4510#ifdef RT_ARCH_AMD64
4511 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4512
4513 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4514 if (iBitNo1 > iBitNo2)
4515 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4516 else
4517 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4518 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4519
4520#elif defined(RT_ARCH_ARM64)
4521 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4522 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4523
4524 /* and tmpreg, eflreg, #1<<iBitNo1 */
4525 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4526
4527 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4528 if (iBitNo1 > iBitNo2)
4529 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4530 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4531 else
4532 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4533 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4534
4535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4536
4537#else
4538# error "Port me"
4539#endif
4540
4541 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4542 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4543 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4544
4545 /* Free but don't flush the EFlags and tmp registers. */
4546 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4547 iemNativeRegFreeTmp(pReNative, idxEflReg);
4548
4549 /* Make a copy of the core state now as we start the if-block. */
4550 iemNativeCondStartIfBlock(pReNative, off);
4551
4552 return off;
4553}
4554
4555
4556#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4557 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4558 do {
4559
4560#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4561 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4562 do {
4563
4564/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4565 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4566DECL_INLINE_THROW(uint32_t)
4567iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4568 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4569{
4570 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4571
4572 /* We need an if-block label for the non-inverted variant. */
4573 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4574 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4575
4576 /* Get the eflags. */
4577 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4578 kIemNativeGstRegUse_ReadOnly);
4579
4580 /* Translate the flag masks to bit numbers. */
4581 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4582 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4583
4584 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4585 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4586 Assert(iBitNo1 != iBitNo);
4587
4588 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4589 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4590 Assert(iBitNo2 != iBitNo);
4591 Assert(iBitNo2 != iBitNo1);
4592
4593#ifdef RT_ARCH_AMD64
4594 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4595#elif defined(RT_ARCH_ARM64)
4596 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4597#endif
4598
4599 /* Check for the lone bit first. */
4600 if (!fInverted)
4601 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4602 else
4603 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4604
4605 /* Then extract and compare the other two bits. */
4606#ifdef RT_ARCH_AMD64
4607 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4608 if (iBitNo1 > iBitNo2)
4609 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4610 else
4611 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4612 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4613
4614#elif defined(RT_ARCH_ARM64)
4615 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4616
4617 /* and tmpreg, eflreg, #1<<iBitNo1 */
4618 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4619
4620 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4621 if (iBitNo1 > iBitNo2)
4622 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4623 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4624 else
4625 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4626 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4627
4628 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4629
4630#else
4631# error "Port me"
4632#endif
4633
4634 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4635 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4636 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4637
4638 /* Free but don't flush the EFlags and tmp registers. */
4639 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4640 iemNativeRegFreeTmp(pReNative, idxEflReg);
4641
4642 /* Make a copy of the core state now as we start the if-block. */
4643 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4644
4645 return off;
4646}
4647
4648
4649#define IEM_MC_IF_CX_IS_NZ() \
4650 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4651 do {
4652
4653/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4654DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4655{
4656 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4657
4658 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4659 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4660 kIemNativeGstRegUse_ReadOnly);
4661 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4662 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4663
4664 iemNativeCondStartIfBlock(pReNative, off);
4665 return off;
4666}
4667
4668
4669#define IEM_MC_IF_ECX_IS_NZ() \
4670 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4671 do {
4672
4673#define IEM_MC_IF_RCX_IS_NZ() \
4674 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4675 do {
4676
4677/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4678DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4679{
4680 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4681
4682 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4683 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4684 kIemNativeGstRegUse_ReadOnly);
4685 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4686 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4687
4688 iemNativeCondStartIfBlock(pReNative, off);
4689 return off;
4690}
4691
4692
4693#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4694 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4695 do {
4696
4697#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4698 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4699 do {
4700
4701/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4702DECL_INLINE_THROW(uint32_t)
4703iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4704{
4705 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4706
4707 /* We have to load both RCX and EFLAGS before we can start branching,
4708 otherwise we'll end up in the else-block with an inconsistent
4709 register allocator state.
4710 Doing EFLAGS first as it's more likely to be loaded, right? */
4711 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4712 kIemNativeGstRegUse_ReadOnly);
4713 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4714 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4715 kIemNativeGstRegUse_ReadOnly);
4716
4717 /** @todo we could reduce this to a single branch instruction by spending a
4718 * temporary register and some setnz stuff. Not sure if loops are
4719 * worth it. */
4720 /* Check CX. */
4721 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4722
4723 /* Check the EFlags bit. */
4724 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4725 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4726 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4727 !fCheckIfSet /*fJmpIfSet*/);
4728
4729 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4730 iemNativeRegFreeTmp(pReNative, idxEflReg);
4731
4732 iemNativeCondStartIfBlock(pReNative, off);
4733 return off;
4734}
4735
4736
4737#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4738 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4739 do {
4740
4741#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4742 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4743 do {
4744
4745#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4746 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4747 do {
4748
4749#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4750 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4751 do {
4752
4753/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4754 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4755 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4756 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4757DECL_INLINE_THROW(uint32_t)
4758iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4759 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4760{
4761 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4762
4763 /* We have to load both RCX and EFLAGS before we can start branching,
4764 otherwise we'll end up in the else-block with an inconsistent
4765 register allocator state.
4766 Doing EFLAGS first as it's more likely to be loaded, right? */
4767 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4768 kIemNativeGstRegUse_ReadOnly);
4769 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4770 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4771 kIemNativeGstRegUse_ReadOnly);
4772
4773 /** @todo we could reduce this to a single branch instruction by spending a
4774 * temporary register and some setnz stuff. Not sure if loops are
4775 * worth it. */
4776 /* Check RCX/ECX. */
4777 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4778
4779 /* Check the EFlags bit. */
4780 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4781 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4782 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4783 !fCheckIfSet /*fJmpIfSet*/);
4784
4785 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4786 iemNativeRegFreeTmp(pReNative, idxEflReg);
4787
4788 iemNativeCondStartIfBlock(pReNative, off);
4789 return off;
4790}
4791
4792
4793/*
4794 * General purpose register stores.
4795 */
4796
4797#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4798 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4799
4800/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4801DECLINLINE(uint32_t) iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4802{
4803 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4804 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
4805 kIemNativeGstRegUse_ForUpdate);
4806#ifdef RT_ARCH_AMD64
4807 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4808
4809 /* To the lowest byte of the register: mov r8, imm8 */
4810 if (iGRegEx < 16)
4811 {
4812 if (idxGstTmpReg >= 8)
4813 pbCodeBuf[off++] = X86_OP_REX_B;
4814 else if (idxGstTmpReg >= 4)
4815 pbCodeBuf[off++] = X86_OP_REX;
4816 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4817 pbCodeBuf[off++] = u8Value;
4818 }
4819 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4820 else if (idxGstTmpReg < 4)
4821 {
4822 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4823 pbCodeBuf[off++] = u8Value;
4824 }
4825 else
4826 {
4827 /* ror reg64, 8 */
4828 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4829 pbCodeBuf[off++] = 0xc1;
4830 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4831 pbCodeBuf[off++] = 8;
4832
4833 /* mov reg8, imm8 */
4834 if (idxGstTmpReg >= 8)
4835 pbCodeBuf[off++] = X86_OP_REX_B;
4836 else if (idxGstTmpReg >= 4)
4837 pbCodeBuf[off++] = X86_OP_REX;
4838 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4839 pbCodeBuf[off++] = u8Value;
4840
4841 /* rol reg64, 8 */
4842 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4843 pbCodeBuf[off++] = 0xc1;
4844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4845 pbCodeBuf[off++] = 8;
4846 }
4847
4848#elif defined(RT_ARCH_ARM64)
4849 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4850 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4851 if (iGRegEx < 16)
4852 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4853 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4854 else
4855 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4856 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4857 iemNativeRegFreeTmp(pReNative, idxImmReg);
4858
4859#else
4860# error "Port me!"
4861#endif
4862
4863 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4864
4865 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4866
4867 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4868 return off;
4869}
4870
4871
4872/*
4873 * General purpose register manipulation (add, sub).
4874 */
4875
4876#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4877 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4878
4879/** Emits code for IEM_MC_SUB_GREG_U16. */
4880DECLINLINE(uint32_t) iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4881{
4882 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4883 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
4884 kIemNativeGstRegUse_ForUpdate);
4885
4886#ifdef RT_ARCH_AMD64
4887 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4888 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4889 if (idxGstTmpReg >= 8)
4890 pbCodeBuf[off++] = X86_OP_REX_B;
4891 if (uSubtrahend)
4892 {
4893 pbCodeBuf[off++] = 0xff; /* dec */
4894 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4895 }
4896 else
4897 {
4898 pbCodeBuf[off++] = 0x81;
4899 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4900 pbCodeBuf[off++] = uSubtrahend;
4901 pbCodeBuf[off++] = 0;
4902 }
4903
4904#else
4905 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4906 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4907
4908 /* sub tmp, gstgrp, uSubtrahend */
4909 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4910
4911 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4912 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4913
4914 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4915#endif
4916
4917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4918
4919 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4920
4921 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4922 return off;
4923}
4924
4925
4926#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4927 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4928
4929#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4930 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4931
4932/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4933DECL_INLINE_THROW(uint32_t)
4934iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4935{
4936 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4937 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
4938 kIemNativeGstRegUse_ForUpdate);
4939
4940#ifdef RT_ARCH_AMD64
4941 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4942 if (f64Bit)
4943 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4944 else if (idxGstTmpReg >= 8)
4945 pbCodeBuf[off++] = X86_OP_REX_B;
4946 if (uSubtrahend == 1)
4947 {
4948 /* dec */
4949 pbCodeBuf[off++] = 0xff;
4950 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4951 }
4952 else if (uSubtrahend < 128)
4953 {
4954 pbCodeBuf[off++] = 0x83; /* sub */
4955 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4956 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4957 }
4958 else
4959 {
4960 pbCodeBuf[off++] = 0x81; /* sub */
4961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4962 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4963 pbCodeBuf[off++] = 0;
4964 pbCodeBuf[off++] = 0;
4965 pbCodeBuf[off++] = 0;
4966 }
4967
4968#else
4969 /* sub tmp, gstgrp, uSubtrahend */
4970 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4971 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4972
4973#endif
4974
4975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4976
4977 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4978
4979 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4980 return off;
4981}
4982
4983
4984
4985/*********************************************************************************************************************************
4986* Builtin functions *
4987*********************************************************************************************************************************/
4988
4989/**
4990 * Built-in function that calls a C-implemention function taking zero arguments.
4991 */
4992static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
4993{
4994 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
4995 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
4996 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
4997}
4998
4999
5000/**
5001 * Built-in function that checks for pending interrupts that can be delivered or
5002 * forced action flags.
5003 *
5004 * This triggers after the completion of an instruction, so EIP is already at
5005 * the next instruction. If an IRQ or important FF is pending, this will return
5006 * a non-zero status that stops TB execution.
5007 */
5008static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
5009{
5010 RT_NOREF(pCallEntry);
5011
5012 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
5013 and I'm too lazy to create a 'Fixed' version of that one. */
5014 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
5015 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
5016
5017 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
5018
5019 /* Again, we need to load the extended EFLAGS before we actually need them
5020 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
5021 loaded them inside the check, as the shadow state would not be correct
5022 when the code branches before the load. Ditto PC. */
5023 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5024 kIemNativeGstRegUse_ReadOnly);
5025
5026 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
5027
5028 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5029
5030 /*
5031 * Start by checking the local forced actions of the EMT we're on for IRQs
5032 * and other FFs that needs servicing.
5033 */
5034 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
5035 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
5036 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
5037 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5038 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
5039 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
5040 | VMCPU_FF_TLB_FLUSH
5041 | VMCPU_FF_UNHALT ),
5042 true /*fSetFlags*/);
5043 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
5044 uint32_t const offFixupJumpToVmCheck1 = off;
5045 off = iemNativeEmitJzToFixed(pReNative, off, 0);
5046
5047 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
5048 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
5049 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
5050 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
5051 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
5052 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5053
5054 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
5055 suppressed by the CPU or not. */
5056 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
5057 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
5058 idxLabelReturnBreak);
5059
5060 /* We've got shadow flags set, so we must check that the PC they are valid
5061 for matches our current PC value. */
5062 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
5063 * a register. */
5064 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
5065 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
5066
5067 /*
5068 * Now check the force flags of the VM.
5069 */
5070 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
5071 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
5072 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
5073 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
5074 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
5075 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
5076
5077 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
5078
5079 /*
5080 * We're good, no IRQs or FFs pending.
5081 */
5082 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5083 iemNativeRegFreeTmp(pReNative, idxEflReg);
5084 iemNativeRegFreeTmp(pReNative, idxPcReg);
5085
5086 return off;
5087}
5088
5089
5090/**
5091 * Built-in function checks if IEMCPU::fExec has the expected value.
5092 */
5093static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
5094{
5095 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
5096 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5097
5098 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5099 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
5100 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
5101 kIemNativeLabelType_ReturnBreak);
5102 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5103 return off;
5104}
5105
5106
5107
5108/*********************************************************************************************************************************
5109* The native code generator functions for each MC block. *
5110*********************************************************************************************************************************/
5111
5112
5113/*
5114 * Include g_apfnIemNativeRecompileFunctions and associated functions.
5115 *
5116 * This should probably live in it's own file later, but lets see what the
5117 * compile times turn out to be first.
5118 */
5119#include "IEMNativeFunctions.cpp.h"
5120
5121
5122
5123/*********************************************************************************************************************************
5124* Recompiler Core. *
5125*********************************************************************************************************************************/
5126
5127
5128/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
5129static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
5130{
5131 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
5132 pDis->cbCachedInstr += cbMaxRead;
5133 RT_NOREF(cbMinRead);
5134 return VERR_NO_DATA;
5135}
5136
5137
5138/**
5139 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
5140 * @returns pszBuf.
5141 * @param fFlags The flags.
5142 * @param pszBuf The output buffer.
5143 * @param cbBuf The output buffer size. At least 32 bytes.
5144 */
5145DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
5146{
5147 Assert(cbBuf >= 32);
5148 static RTSTRTUPLE const s_aModes[] =
5149 {
5150 /* [00] = */ { RT_STR_TUPLE("16BIT") },
5151 /* [01] = */ { RT_STR_TUPLE("32BIT") },
5152 /* [02] = */ { RT_STR_TUPLE("!2!") },
5153 /* [03] = */ { RT_STR_TUPLE("!3!") },
5154 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
5155 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
5156 /* [06] = */ { RT_STR_TUPLE("!6!") },
5157 /* [07] = */ { RT_STR_TUPLE("!7!") },
5158 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
5159 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
5160 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
5161 /* [0b] = */ { RT_STR_TUPLE("!b!") },
5162 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
5163 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
5164 /* [0e] = */ { RT_STR_TUPLE("!e!") },
5165 /* [0f] = */ { RT_STR_TUPLE("!f!") },
5166 /* [10] = */ { RT_STR_TUPLE("!10!") },
5167 /* [11] = */ { RT_STR_TUPLE("!11!") },
5168 /* [12] = */ { RT_STR_TUPLE("!12!") },
5169 /* [13] = */ { RT_STR_TUPLE("!13!") },
5170 /* [14] = */ { RT_STR_TUPLE("!14!") },
5171 /* [15] = */ { RT_STR_TUPLE("!15!") },
5172 /* [16] = */ { RT_STR_TUPLE("!16!") },
5173 /* [17] = */ { RT_STR_TUPLE("!17!") },
5174 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
5175 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
5176 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
5177 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
5178 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
5179 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
5180 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
5181 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
5182 };
5183 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
5184 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
5185 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
5186
5187 pszBuf[off++] = ' ';
5188 pszBuf[off++] = 'C';
5189 pszBuf[off++] = 'P';
5190 pszBuf[off++] = 'L';
5191 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
5192 Assert(off < 32);
5193
5194 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
5195
5196 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
5197 {
5198 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
5199 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
5200 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
5201 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
5202 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
5203 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
5204 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
5205 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
5206 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
5207 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
5208 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
5209 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
5210 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
5211 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
5212 };
5213 if (fFlags)
5214 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
5215 if (s_aFlags[i].fFlag & fFlags)
5216 {
5217 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
5218 pszBuf[off++] = ' ';
5219 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
5220 off += s_aFlags[i].cchName;
5221 fFlags &= ~s_aFlags[i].fFlag;
5222 if (!fFlags)
5223 break;
5224 }
5225 pszBuf[off] = '\0';
5226
5227 return pszBuf;
5228}
5229
5230
5231DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
5232{
5233 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
5234
5235 char szDisBuf[512];
5236 DISSTATE Dis;
5237 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
5238 uint32_t const cNative = pTb->Native.cInstructions;
5239 uint32_t offNative = 0;
5240#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5241 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
5242#endif
5243 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
5244 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
5245 : DISCPUMODE_64BIT;
5246#ifdef RT_ARCH_AMD64
5247 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
5248#elif defined(RT_ARCH_ARM64)
5249 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
5250#else
5251# error "Port me"
5252#endif
5253
5254 /*
5255 * Print TB info.
5256 */
5257 pHlp->pfnPrintf(pHlp,
5258 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
5259 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
5260 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
5261 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
5262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5263 if (pDbgInfo && pDbgInfo->cEntries > 1)
5264 {
5265 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
5266
5267 /*
5268 * This disassembly is driven by the debug info which follows the native
5269 * code and indicates when it starts with the next guest instructions,
5270 * where labels are and such things.
5271 */
5272 uint32_t idxThreadedCall = 0;
5273 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
5274 uint8_t idxRange = UINT8_MAX;
5275 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
5276 uint32_t offRange = 0;
5277 uint32_t offOpcodes = 0;
5278 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
5279 uint32_t const cDbgEntries = pDbgInfo->cEntries;
5280 uint32_t iDbgEntry = 1;
5281 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
5282
5283 while (offNative < cNative)
5284 {
5285 /* If we're at or have passed the point where the next chunk of debug
5286 info starts, process it. */
5287 if (offDbgNativeNext <= offNative)
5288 {
5289 offDbgNativeNext = UINT32_MAX;
5290 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
5291 {
5292 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
5293 {
5294 case kIemTbDbgEntryType_GuestInstruction:
5295 {
5296 /* Did the exec flag change? */
5297 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
5298 {
5299 pHlp->pfnPrintf(pHlp,
5300 " fExec change %#08x -> %#08x %s\n",
5301 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
5302 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
5303 szDisBuf, sizeof(szDisBuf)));
5304 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
5305 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
5306 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
5307 : DISCPUMODE_64BIT;
5308 }
5309
5310 /* New opcode range? We need to fend up a spurious debug info entry here for cases
5311 where the compilation was aborted before the opcode was recorded and the actual
5312 instruction was translated to a threaded call. This may happen when we run out
5313 of ranges, or when some complicated interrupts/FFs are found to be pending or
5314 similar. So, we just deal with it here rather than in the compiler code as it
5315 is a lot simpler to do up here. */
5316 if ( idxRange == UINT8_MAX
5317 || idxRange >= cRanges
5318 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
5319 {
5320 idxRange += 1;
5321 if (idxRange < cRanges)
5322 offRange = 0;
5323 else
5324 continue;
5325 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
5326 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
5327 + (pTb->aRanges[idxRange].idxPhysPage == 0
5328 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
5329 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
5330 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
5331 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
5332 pTb->aRanges[idxRange].idxPhysPage);
5333 }
5334
5335 /* Disassemble the instruction. */
5336 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
5337 uint32_t cbInstr = 1;
5338 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
5339 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
5340 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
5341 if (RT_SUCCESS(rc))
5342 {
5343 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
5344 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
5345 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
5346 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
5347
5348 static unsigned const s_offMarker = 55;
5349 static char const s_szMarker[] = " ; <--- guest";
5350 if (cch < s_offMarker)
5351 {
5352 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
5353 cch = s_offMarker;
5354 }
5355 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
5356 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
5357
5358 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
5359 }
5360 else
5361 {
5362 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
5363 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
5364 cbInstr = 1;
5365 }
5366 GCPhysPc += cbInstr;
5367 offOpcodes += cbInstr;
5368 offRange += cbInstr;
5369 continue;
5370 }
5371
5372 case kIemTbDbgEntryType_ThreadedCall:
5373 pHlp->pfnPrintf(pHlp,
5374 " Call #%u to %s (%u args)%s\n",
5375 idxThreadedCall,
5376 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
5377 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
5378 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
5379 idxThreadedCall++;
5380 continue;
5381
5382 case kIemTbDbgEntryType_GuestRegShadowing:
5383 {
5384 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
5385 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
5386 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
5387 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
5388 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
5389 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
5390 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
5391 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
5392 else
5393 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
5394 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
5395 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
5396 continue;
5397 }
5398
5399 case kIemTbDbgEntryType_Label:
5400 {
5401 const char *pszName = "what_the_fudge";
5402 const char *pszComment = "";
5403 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
5404 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
5405 {
5406 case kIemNativeLabelType_Return:
5407 pszName = "Return";
5408 break;
5409 case kIemNativeLabelType_ReturnBreak:
5410 pszName = "ReturnBreak";
5411 break;
5412 case kIemNativeLabelType_ReturnWithFlags:
5413 pszName = "ReturnWithFlags";
5414 break;
5415 case kIemNativeLabelType_NonZeroRetOrPassUp:
5416 pszName = "NonZeroRetOrPassUp";
5417 break;
5418 case kIemNativeLabelType_RaiseGp0:
5419 pszName = "RaiseGp0";
5420 break;
5421 case kIemNativeLabelType_If:
5422 pszName = "If";
5423 fNumbered = true;
5424 break;
5425 case kIemNativeLabelType_Else:
5426 pszName = "Else";
5427 fNumbered = true;
5428 pszComment = " ; regs state restored pre-if-block";
5429 break;
5430 case kIemNativeLabelType_Endif:
5431 pszName = "Endif";
5432 fNumbered = true;
5433 break;
5434 case kIemNativeLabelType_CheckIrq:
5435 pszName = "CheckIrq_CheckVM";
5436 fNumbered = true;
5437 break;
5438 case kIemNativeLabelType_Invalid:
5439 case kIemNativeLabelType_End:
5440 break;
5441 }
5442 if (fNumbered)
5443 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
5444 else
5445 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
5446 continue;
5447 }
5448
5449 case kIemTbDbgEntryType_NativeOffset:
5450 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
5451 Assert(offDbgNativeNext > offNative);
5452 break;
5453
5454 default:
5455 AssertFailed();
5456 }
5457 iDbgEntry++;
5458 break;
5459 }
5460 }
5461
5462 /*
5463 * Disassemble the next native instruction.
5464 */
5465 uint32_t cbInstr = sizeof(paNative[0]);
5466 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
5467 if (RT_SUCCESS(rc))
5468 {
5469# if defined(RT_ARCH_AMD64)
5470 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
5471 {
5472 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
5473 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
5474 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
5475 &paNative[offNative], uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
5476 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
5477 uInfo & 0x8000 ? " - recompiled" : "");
5478 else
5479 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
5480 }
5481 else
5482# endif
5483 {
5484 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
5485 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
5486 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
5487 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
5488 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
5489 }
5490 }
5491 else
5492 {
5493# if defined(RT_ARCH_AMD64)
5494 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
5495 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
5496# elif defined(RT_ARCH_ARM64)
5497 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
5498 &paNative[offNative], paNative[offNative], rc);
5499# else
5500# error "Port me"
5501# endif
5502 cbInstr = sizeof(paNative[0]);
5503 }
5504 offNative += cbInstr / sizeof(paNative[0]);
5505 }
5506 }
5507 else
5508#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
5509 {
5510 /*
5511 * No debug info, just disassemble the x86 code and then the native code.
5512 */
5513 /* The guest code. */
5514 for (unsigned i = 0; i < pTb->cRanges; i++)
5515 {
5516 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
5517 + (pTb->aRanges[i].idxPhysPage == 0
5518 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
5519 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
5520 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
5521 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
5522 unsigned off = pTb->aRanges[i].offOpcodes;
5523 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
5524 while (off < cbOpcodes)
5525 {
5526 uint32_t cbInstr = 1;
5527 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
5528 &pTb->pabOpcodes[off], cbOpcodes - off,
5529 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
5530 if (RT_SUCCESS(rc))
5531 {
5532 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
5533 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
5534 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
5535 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
5536 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
5537 GCPhysPc += cbInstr;
5538 off += cbInstr;
5539 }
5540 else
5541 {
5542 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
5543 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
5544 break;
5545 }
5546 }
5547 }
5548
5549 /* The native code: */
5550 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
5551 while(offNative < cNative)
5552 {
5553 uint32_t cbInstr = sizeof(paNative[0]);
5554 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
5555 if (RT_SUCCESS(rc))
5556 {
5557# if defined(RT_ARCH_AMD64)
5558 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
5559 {
5560 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
5561 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
5562 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
5563 &paNative[offNative], uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
5564 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
5565 uInfo & 0x8000 ? " - recompiled" : "");
5566 else
5567 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
5568 }
5569 else
5570# endif
5571 {
5572 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
5573 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
5574 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
5575 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
5576 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
5577 }
5578 }
5579 else
5580 {
5581# if defined(RT_ARCH_AMD64)
5582 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
5583 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
5584# elif defined(RT_ARCH_ARM64)
5585 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
5586 &paNative[offNative], paNative[offNative], rc);
5587# else
5588# error "Port me"
5589#endif
5590 cbInstr = sizeof(paNative[0]);
5591 }
5592 offNative += cbInstr / sizeof(paNative[0]);
5593 }
5594 }
5595}
5596
5597
5598/**
5599 * Recompiles the given threaded TB into a native one.
5600 *
5601 * In case of failure the translation block will be returned as-is.
5602 *
5603 * @returns pTb.
5604 * @param pVCpu The cross context virtual CPU structure of the calling
5605 * thread.
5606 * @param pTb The threaded translation to recompile to native.
5607 */
5608DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
5609{
5610 /*
5611 * The first time thru, we allocate the recompiler state, the other times
5612 * we just need to reset it before using it again.
5613 */
5614 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
5615 if (RT_LIKELY(pReNative))
5616 iemNativeReInit(pReNative, pTb);
5617 else
5618 {
5619 pReNative = iemNativeInit(pVCpu, pTb);
5620 AssertReturn(pReNative, pTb);
5621 }
5622
5623 /*
5624 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
5625 * for aborting if an error happens.
5626 */
5627 uint32_t cCallsLeft = pTb->Thrd.cCalls;
5628#ifdef LOG_ENABLED
5629 uint32_t const cCallsOrg = cCallsLeft;
5630#endif
5631 uint32_t off = 0;
5632 int rc = VINF_SUCCESS;
5633 IEMNATIVE_TRY_SETJMP(pReNative, rc)
5634 {
5635 /*
5636 * Emit prolog code (fixed).
5637 */
5638 off = iemNativeEmitProlog(pReNative, off);
5639
5640 /*
5641 * Convert the calls to native code.
5642 */
5643#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5644 int32_t iGstInstr = -1;
5645 uint32_t fExec = pTb->fFlags;
5646#endif
5647 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
5648 while (cCallsLeft-- > 0)
5649 {
5650 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
5651
5652 /*
5653 * Debug info and assembly markup.
5654 */
5655#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5656 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
5657 fExec = pCallEntry->auParams[0];
5658 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5659 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
5660 {
5661 if (iGstInstr < (int32_t)pTb->cInstructions)
5662 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
5663 else
5664 Assert(iGstInstr == pTb->cInstructions);
5665 iGstInstr = pCallEntry->idxInstr;
5666 }
5667 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
5668#elif defined(VBOX_STRICT)
5669 off = iemNativeEmitMarker(pReNative, off,
5670 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
5671 pCallEntry->enmFunction));
5672#endif
5673
5674 /*
5675 * Actual work.
5676 */
5677 if (pfnRecom) /** @todo stats on this. */
5678 {
5679 //STAM_COUNTER_INC()
5680 off = pfnRecom(pReNative, off, pCallEntry);
5681 }
5682 else
5683 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
5684 Assert(off <= pReNative->cInstrBufAlloc);
5685 Assert(pReNative->cCondDepth == 0);
5686
5687 /*
5688 * Advance.
5689 */
5690 pCallEntry++;
5691 }
5692
5693 /*
5694 * Emit the epilog code.
5695 */
5696 uint32_t idxReturnLabel;
5697 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
5698
5699 /*
5700 * Generate special jump labels.
5701 */
5702 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
5703 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
5704 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
5705 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
5706 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
5707 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
5708 }
5709 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
5710 {
5711 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
5712 return pTb;
5713 }
5714 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
5715 Assert(off <= pReNative->cInstrBufAlloc);
5716
5717 /*
5718 * Make sure all labels has been defined.
5719 */
5720 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
5721#ifdef VBOX_STRICT
5722 uint32_t const cLabels = pReNative->cLabels;
5723 for (uint32_t i = 0; i < cLabels; i++)
5724 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
5725#endif
5726
5727 /*
5728 * Allocate executable memory, copy over the code we've generated.
5729 */
5730 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
5731 if (pTbAllocator->pDelayedFreeHead)
5732 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
5733
5734 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
5735 AssertReturn(paFinalInstrBuf, pTb);
5736 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
5737
5738 /*
5739 * Apply fixups.
5740 */
5741 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
5742 uint32_t const cFixups = pReNative->cFixups;
5743 for (uint32_t i = 0; i < cFixups; i++)
5744 {
5745 Assert(paFixups[i].off < off);
5746 Assert(paFixups[i].idxLabel < cLabels);
5747 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
5748 switch (paFixups[i].enmType)
5749 {
5750#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5751 case kIemNativeFixupType_Rel32:
5752 Assert(paFixups[i].off + 4 <= off);
5753 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
5754 continue;
5755
5756#elif defined(RT_ARCH_ARM64)
5757 case kIemNativeFixupType_RelImm19At5:
5758 {
5759 Assert(paFixups[i].off < off);
5760 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
5761 Assert(offDisp >= -262144 && offDisp < 262144);
5762 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
5763 continue;
5764 }
5765#endif
5766 case kIemNativeFixupType_Invalid:
5767 case kIemNativeFixupType_End:
5768 break;
5769 }
5770 AssertFailed();
5771 }
5772
5773 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
5774
5775 /*
5776 * Convert the translation block.
5777 */
5778 //RT_BREAKPOINT();
5779 RTMemFree(pTb->Thrd.paCalls);
5780 pTb->Native.paInstructions = paFinalInstrBuf;
5781 pTb->Native.cInstructions = off;
5782 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
5783#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5784 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
5785 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
5786#endif
5787
5788 Assert(pTbAllocator->cThreadedTbs > 0);
5789 pTbAllocator->cThreadedTbs -= 1;
5790 pTbAllocator->cNativeTbs += 1;
5791 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
5792
5793#ifdef LOG_ENABLED
5794 /*
5795 * Disassemble to the log if enabled.
5796 */
5797 if (LogIs3Enabled())
5798 {
5799 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
5800 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
5801 }
5802#endif
5803
5804 return pTb;
5805}
5806
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette