VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103876

Last change on this file since 103876 was 103876, checked in by vboxsync, 9 months ago

VMM/IEM: Reverted silent assertion 'fix' from r162236 as it is wrong (explained this on IRC already). Removed the check of 'off' altogether as it is wrong even if someone defines the label early, say for the first instruction, other may still jump to it from the last instruction in the TB, so the same jump restrictions apply. bugref:10614 bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 399.3 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103876 2024-03-16 02:11:04Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873/**
1874 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1875 */
1876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1877{
1878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1879 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1880#else
1881 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1882#endif
1883}
1884
1885
1886/**
1887 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1888 */
1889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1890{
1891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1892 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1893#else
1894 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1895#endif
1896}
1897
1898
1899/**
1900 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1901 */
1902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1903{
1904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1905 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1906#else
1907 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1908#endif
1909}
1910
1911
1912/**
1913 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1914 */
1915IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1916{
1917#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1918 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1919#else
1920 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1921#endif
1922}
1923
1924
1925
1926/**
1927 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1932 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1933#else
1934 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1935#endif
1936}
1937
1938
1939/**
1940 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1941 */
1942IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1943{
1944#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1945 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1946#else
1947 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1948#endif
1949}
1950
1951
1952/**
1953 * Used by TB code to store an 32-bit selector value onto a generic stack.
1954 *
1955 * Intel CPUs doesn't do write a whole dword, thus the special function.
1956 */
1957IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1958{
1959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1960 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1961#else
1962 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1963#endif
1964}
1965
1966
1967/**
1968 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1969 */
1970IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1971{
1972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1973 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1974#else
1975 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1976#endif
1977}
1978
1979
1980/**
1981 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1982 */
1983IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1984{
1985#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1986 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1987#else
1988 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1989#endif
1990}
1991
1992
1993/**
1994 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1999 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2000#else
2001 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2008 */
2009IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2010{
2011#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2012 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2013#else
2014 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2015#endif
2016}
2017
2018
2019
2020/*********************************************************************************************************************************
2021* Helpers: Flat memory fetches and stores. *
2022*********************************************************************************************************************************/
2023
2024/**
2025 * Used by TB code to load unsigned 8-bit data w/ flat address.
2026 * @note Zero extending the value to 64-bit to simplify assembly.
2027 */
2028IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2029{
2030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2031 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2032#else
2033 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2034#endif
2035}
2036
2037
2038/**
2039 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2040 * to 16 bits.
2041 * @note Zero extending the value to 64-bit to simplify assembly.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2044{
2045#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2046 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2047#else
2048 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2049#endif
2050}
2051
2052
2053/**
2054 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2055 * to 32 bits.
2056 * @note Zero extending the value to 64-bit to simplify assembly.
2057 */
2058IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2059{
2060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2061 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2062#else
2063 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2064#endif
2065}
2066
2067
2068/**
2069 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2070 * to 64 bits.
2071 */
2072IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2073{
2074#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2075 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2076#else
2077 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2078#endif
2079}
2080
2081
2082/**
2083 * Used by TB code to load unsigned 16-bit data w/ flat address.
2084 * @note Zero extending the value to 64-bit to simplify assembly.
2085 */
2086IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2087{
2088#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2089 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2090#else
2091 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2092#endif
2093}
2094
2095
2096/**
2097 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2098 * to 32 bits.
2099 * @note Zero extending the value to 64-bit to simplify assembly.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2102{
2103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2104 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2105#else
2106 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2107#endif
2108}
2109
2110
2111/**
2112 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2113 * to 64 bits.
2114 * @note Zero extending the value to 64-bit to simplify assembly.
2115 */
2116IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2117{
2118#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2119 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2120#else
2121 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2122#endif
2123}
2124
2125
2126/**
2127 * Used by TB code to load unsigned 32-bit data w/ flat address.
2128 * @note Zero extending the value to 64-bit to simplify assembly.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2133 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2134#else
2135 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2136#endif
2137}
2138
2139
2140/**
2141 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2142 * to 64 bits.
2143 * @note Zero extending the value to 64-bit to simplify assembly.
2144 */
2145IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2146{
2147#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2148 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2149#else
2150 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2151#endif
2152}
2153
2154
2155/**
2156 * Used by TB code to load unsigned 64-bit data w/ flat address.
2157 */
2158IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2159{
2160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2161 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2162#else
2163 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2164#endif
2165}
2166
2167
2168/**
2169 * Used by TB code to store unsigned 8-bit data w/ flat address.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2172{
2173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2174 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2175#else
2176 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2177#endif
2178}
2179
2180
2181/**
2182 * Used by TB code to store unsigned 16-bit data w/ flat address.
2183 */
2184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2185{
2186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2187 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2188#else
2189 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2190#endif
2191}
2192
2193
2194/**
2195 * Used by TB code to store unsigned 32-bit data w/ flat address.
2196 */
2197IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2198{
2199#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2200 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2201#else
2202 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2203#endif
2204}
2205
2206
2207/**
2208 * Used by TB code to store unsigned 64-bit data w/ flat address.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2213 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2214#else
2215 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2216#endif
2217}
2218
2219
2220
2221/**
2222 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2227 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2228#else
2229 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2240 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2241#else
2242 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to store a segment selector value onto a flat stack.
2249 *
2250 * Intel CPUs doesn't do write a whole dword, thus the special function.
2251 */
2252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2253{
2254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2255 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2256#else
2257 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2258#endif
2259}
2260
2261
2262/**
2263 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2264 */
2265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2266{
2267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2268 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2269#else
2270 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2271#endif
2272}
2273
2274
2275/**
2276 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2277 */
2278IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2279{
2280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2281 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2282#else
2283 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2284#endif
2285}
2286
2287
2288/**
2289 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2290 */
2291IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2292{
2293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2294 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2295#else
2296 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2297#endif
2298}
2299
2300
2301/**
2302 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2303 */
2304IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2305{
2306#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2307 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2308#else
2309 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2310#endif
2311}
2312
2313
2314
2315/*********************************************************************************************************************************
2316* Helpers: Segmented memory mapping. *
2317*********************************************************************************************************************************/
2318
2319/**
2320 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2321 * segmentation.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2324 RTGCPTR GCPtrMem, uint8_t iSegReg))
2325{
2326#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2327 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2328#else
2329 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2330#endif
2331}
2332
2333
2334/**
2335 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2336 */
2337IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2338 RTGCPTR GCPtrMem, uint8_t iSegReg))
2339{
2340#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2341 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2342#else
2343 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2344#endif
2345}
2346
2347
2348/**
2349 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2350 */
2351IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2352 RTGCPTR GCPtrMem, uint8_t iSegReg))
2353{
2354#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2355 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2356#else
2357 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2358#endif
2359}
2360
2361
2362/**
2363 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2364 */
2365IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2366 RTGCPTR GCPtrMem, uint8_t iSegReg))
2367{
2368#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2369 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2370#else
2371 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2372#endif
2373}
2374
2375
2376/**
2377 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2378 * segmentation.
2379 */
2380IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2381 RTGCPTR GCPtrMem, uint8_t iSegReg))
2382{
2383#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2384 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2385#else
2386 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2387#endif
2388}
2389
2390
2391/**
2392 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2393 */
2394IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2395 RTGCPTR GCPtrMem, uint8_t iSegReg))
2396{
2397#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2398 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2399#else
2400 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2401#endif
2402}
2403
2404
2405/**
2406 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2407 */
2408IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2409 RTGCPTR GCPtrMem, uint8_t iSegReg))
2410{
2411#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2412 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2413#else
2414 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2415#endif
2416}
2417
2418
2419/**
2420 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2421 */
2422IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2423 RTGCPTR GCPtrMem, uint8_t iSegReg))
2424{
2425#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2426 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2427#else
2428 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2429#endif
2430}
2431
2432
2433/**
2434 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2435 * segmentation.
2436 */
2437IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2438 RTGCPTR GCPtrMem, uint8_t iSegReg))
2439{
2440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2441 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2442#else
2443 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2444#endif
2445}
2446
2447
2448/**
2449 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2450 */
2451IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2452 RTGCPTR GCPtrMem, uint8_t iSegReg))
2453{
2454#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2455 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2456#else
2457 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2458#endif
2459}
2460
2461
2462/**
2463 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2464 */
2465IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2466 RTGCPTR GCPtrMem, uint8_t iSegReg))
2467{
2468#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2469 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2470#else
2471 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2472#endif
2473}
2474
2475
2476/**
2477 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2478 */
2479IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2480 RTGCPTR GCPtrMem, uint8_t iSegReg))
2481{
2482#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2483 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2484#else
2485 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2486#endif
2487}
2488
2489
2490/**
2491 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2492 * segmentation.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2495 RTGCPTR GCPtrMem, uint8_t iSegReg))
2496{
2497#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2498 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2499#else
2500 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2501#endif
2502}
2503
2504
2505/**
2506 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2507 */
2508IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2509 RTGCPTR GCPtrMem, uint8_t iSegReg))
2510{
2511#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2512 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2513#else
2514 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2515#endif
2516}
2517
2518
2519/**
2520 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2521 */
2522IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2523 RTGCPTR GCPtrMem, uint8_t iSegReg))
2524{
2525#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2526 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2527#else
2528 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2529#endif
2530}
2531
2532
2533/**
2534 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2535 */
2536IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2537 RTGCPTR GCPtrMem, uint8_t iSegReg))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2540 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2541#else
2542 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2551 RTGCPTR GCPtrMem, uint8_t iSegReg))
2552{
2553#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2554 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2555#else
2556 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2557#endif
2558}
2559
2560
2561/**
2562 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2563 */
2564IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2565 RTGCPTR GCPtrMem, uint8_t iSegReg))
2566{
2567#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2568 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2569#else
2570 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2571#endif
2572}
2573
2574
2575/**
2576 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2577 * segmentation.
2578 */
2579IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2580 RTGCPTR GCPtrMem, uint8_t iSegReg))
2581{
2582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2583 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2584#else
2585 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2586#endif
2587}
2588
2589
2590/**
2591 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2592 */
2593IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2594 RTGCPTR GCPtrMem, uint8_t iSegReg))
2595{
2596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2597 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2598#else
2599 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2600#endif
2601}
2602
2603
2604/**
2605 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2606 */
2607IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2608 RTGCPTR GCPtrMem, uint8_t iSegReg))
2609{
2610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2611 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2612#else
2613 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2614#endif
2615}
2616
2617
2618/**
2619 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2620 */
2621IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2622 RTGCPTR GCPtrMem, uint8_t iSegReg))
2623{
2624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2625 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2626#else
2627 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2628#endif
2629}
2630
2631
2632/*********************************************************************************************************************************
2633* Helpers: Flat memory mapping. *
2634*********************************************************************************************************************************/
2635
2636/**
2637 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2638 * address.
2639 */
2640IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2644#else
2645 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2654{
2655#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2656 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2657#else
2658 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2659#endif
2660}
2661
2662
2663/**
2664 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2665 */
2666IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2667{
2668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2669 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2670#else
2671 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2672#endif
2673}
2674
2675
2676/**
2677 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2678 */
2679IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2680{
2681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2682 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2683#else
2684 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2685#endif
2686}
2687
2688
2689/**
2690 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2691 * address.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2694{
2695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2696 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2697#else
2698 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2699#endif
2700}
2701
2702
2703/**
2704 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2705 */
2706IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2707{
2708#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2709 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2710#else
2711 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2712#endif
2713}
2714
2715
2716/**
2717 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2718 */
2719IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2720{
2721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2722 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2723#else
2724 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2725#endif
2726}
2727
2728
2729/**
2730 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2731 */
2732IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2733{
2734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2735 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2736#else
2737 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2738#endif
2739}
2740
2741
2742/**
2743 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2744 * address.
2745 */
2746IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2747{
2748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2749 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2750#else
2751 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2752#endif
2753}
2754
2755
2756/**
2757 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2758 */
2759IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2760{
2761#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2762 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2763#else
2764 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2765#endif
2766}
2767
2768
2769/**
2770 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2771 */
2772IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2773{
2774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2775 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2776#else
2777 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2778#endif
2779}
2780
2781
2782/**
2783 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2784 */
2785IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2786{
2787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2788 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2789#else
2790 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2791#endif
2792}
2793
2794
2795/**
2796 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2797 * address.
2798 */
2799IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2800{
2801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2802 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2803#else
2804 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2805#endif
2806}
2807
2808
2809/**
2810 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2811 */
2812IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2813{
2814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2815 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2816#else
2817 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2818#endif
2819}
2820
2821
2822/**
2823 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2824 */
2825IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2826{
2827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2828 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2829#else
2830 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2831#endif
2832}
2833
2834
2835/**
2836 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2839{
2840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2841 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2842#else
2843 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2844#endif
2845}
2846
2847
2848/**
2849 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2850 */
2851IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2852{
2853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2854 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2855#else
2856 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2857#endif
2858}
2859
2860
2861/**
2862 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2863 */
2864IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2865{
2866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2867 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2868#else
2869 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2870#endif
2871}
2872
2873
2874/**
2875 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2876 * address.
2877 */
2878IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2879{
2880#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2881 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2882#else
2883 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2884#endif
2885}
2886
2887
2888/**
2889 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2890 */
2891IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2892{
2893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2894 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2895#else
2896 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2897#endif
2898}
2899
2900
2901/**
2902 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2903 */
2904IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2905{
2906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2907 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2908#else
2909 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2910#endif
2911}
2912
2913
2914/**
2915 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2916 */
2917IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2918{
2919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2920 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2921#else
2922 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2923#endif
2924}
2925
2926
2927/*********************************************************************************************************************************
2928* Helpers: Commit, rollback & unmap *
2929*********************************************************************************************************************************/
2930
2931/**
2932 * Used by TB code to commit and unmap a read-write memory mapping.
2933 */
2934IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2935{
2936 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2937}
2938
2939
2940/**
2941 * Used by TB code to commit and unmap a read-write memory mapping.
2942 */
2943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2944{
2945 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2946}
2947
2948
2949/**
2950 * Used by TB code to commit and unmap a write-only memory mapping.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2953{
2954 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2955}
2956
2957
2958/**
2959 * Used by TB code to commit and unmap a read-only memory mapping.
2960 */
2961IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2962{
2963 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2964}
2965
2966
2967/**
2968 * Reinitializes the native recompiler state.
2969 *
2970 * Called before starting a new recompile job.
2971 */
2972static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2973{
2974 pReNative->cLabels = 0;
2975 pReNative->bmLabelTypes = 0;
2976 pReNative->cFixups = 0;
2977#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2978 pReNative->pDbgInfo->cEntries = 0;
2979#endif
2980 pReNative->pTbOrg = pTb;
2981 pReNative->cCondDepth = 0;
2982 pReNative->uCondSeqNo = 0;
2983 pReNative->uCheckIrqSeqNo = 0;
2984 pReNative->uTlbSeqNo = 0;
2985
2986#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2987 pReNative->Core.offPc = 0;
2988 pReNative->Core.cInstrPcUpdateSkipped = 0;
2989#endif
2990#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2991 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2992#endif
2993 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2994#if IEMNATIVE_HST_GREG_COUNT < 32
2995 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2996#endif
2997 ;
2998 pReNative->Core.bmHstRegsWithGstShadow = 0;
2999 pReNative->Core.bmGstRegShadows = 0;
3000 pReNative->Core.bmVars = 0;
3001 pReNative->Core.bmStack = 0;
3002 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3003 pReNative->Core.u64ArgVars = UINT64_MAX;
3004
3005 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3006 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3007 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3008 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3009 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3010 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3011 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3012 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3013 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3014 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3015 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3016 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3017 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3018 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3019 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3020 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3021 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3022
3023 /* Full host register reinit: */
3024 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3025 {
3026 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3027 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3028 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3029 }
3030
3031 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3032 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3033#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3034 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3035#endif
3036#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3037 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3038#endif
3039#ifdef IEMNATIVE_REG_FIXED_TMP1
3040 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3041#endif
3042#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3043 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3044#endif
3045 );
3046 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3047 {
3048 fRegs &= ~RT_BIT_32(idxReg);
3049 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3050 }
3051
3052 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3053#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3054 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3055#endif
3056#ifdef IEMNATIVE_REG_FIXED_TMP0
3057 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3058#endif
3059#ifdef IEMNATIVE_REG_FIXED_TMP1
3060 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3061#endif
3062#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3063 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3064#endif
3065
3066#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3067# ifdef RT_ARCH_ARM64
3068 /*
3069 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3070 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3071 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3072 * and the register allocator assumes that it will be always free when the lower is picked.
3073 */
3074 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3075# else
3076 uint32_t const fFixedAdditional = 0;
3077# endif
3078
3079 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3080 | fFixedAdditional
3081# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3082 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3083# endif
3084 ;
3085 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3086 pReNative->Core.bmGstSimdRegShadows = 0;
3087 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3088 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3089
3090 /* Full host register reinit: */
3091 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3092 {
3093 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3094 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3095 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3096 }
3097
3098 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3099 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3100 {
3101 fRegs &= ~RT_BIT_32(idxReg);
3102 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3103 }
3104
3105#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3106 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3107#endif
3108
3109#endif
3110
3111 return pReNative;
3112}
3113
3114
3115/**
3116 * Allocates and initializes the native recompiler state.
3117 *
3118 * This is called the first time an EMT wants to recompile something.
3119 *
3120 * @returns Pointer to the new recompiler state.
3121 * @param pVCpu The cross context virtual CPU structure of the calling
3122 * thread.
3123 * @param pTb The TB that's about to be recompiled.
3124 * @thread EMT(pVCpu)
3125 */
3126static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3127{
3128 VMCPU_ASSERT_EMT(pVCpu);
3129
3130 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3131 AssertReturn(pReNative, NULL);
3132
3133 /*
3134 * Try allocate all the buffers and stuff we need.
3135 */
3136 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3137 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3138 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3139#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3140 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3141#endif
3142 if (RT_LIKELY( pReNative->pInstrBuf
3143 && pReNative->paLabels
3144 && pReNative->paFixups)
3145#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3146 && pReNative->pDbgInfo
3147#endif
3148 )
3149 {
3150 /*
3151 * Set the buffer & array sizes on success.
3152 */
3153 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3154 pReNative->cLabelsAlloc = _8K;
3155 pReNative->cFixupsAlloc = _16K;
3156#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3157 pReNative->cDbgInfoAlloc = _16K;
3158#endif
3159
3160 /* Other constant stuff: */
3161 pReNative->pVCpu = pVCpu;
3162
3163 /*
3164 * Done, just need to save it and reinit it.
3165 */
3166 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3167 return iemNativeReInit(pReNative, pTb);
3168 }
3169
3170 /*
3171 * Failed. Cleanup and return.
3172 */
3173 AssertFailed();
3174 RTMemFree(pReNative->pInstrBuf);
3175 RTMemFree(pReNative->paLabels);
3176 RTMemFree(pReNative->paFixups);
3177#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3178 RTMemFree(pReNative->pDbgInfo);
3179#endif
3180 RTMemFree(pReNative);
3181 return NULL;
3182}
3183
3184
3185/**
3186 * Creates a label
3187 *
3188 * If the label does not yet have a defined position,
3189 * call iemNativeLabelDefine() later to set it.
3190 *
3191 * @returns Label ID. Throws VBox status code on failure, so no need to check
3192 * the return value.
3193 * @param pReNative The native recompile state.
3194 * @param enmType The label type.
3195 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3196 * label is not yet defined (default).
3197 * @param uData Data associated with the lable. Only applicable to
3198 * certain type of labels. Default is zero.
3199 */
3200DECL_HIDDEN_THROW(uint32_t)
3201iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3202 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3203{
3204 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3205
3206 /*
3207 * Locate existing label definition.
3208 *
3209 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3210 * and uData is zero.
3211 */
3212 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3213 uint32_t const cLabels = pReNative->cLabels;
3214 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3215#ifndef VBOX_STRICT
3216 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3217 && offWhere == UINT32_MAX
3218 && uData == 0
3219#endif
3220 )
3221 {
3222#ifndef VBOX_STRICT
3223 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3224 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3225 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3226 if (idxLabel < pReNative->cLabels)
3227 return idxLabel;
3228#else
3229 for (uint32_t i = 0; i < cLabels; i++)
3230 if ( paLabels[i].enmType == enmType
3231 && paLabels[i].uData == uData)
3232 {
3233 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3234 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3235 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3236 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3237 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3238 return i;
3239 }
3240 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3241 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3242#endif
3243 }
3244
3245 /*
3246 * Make sure we've got room for another label.
3247 */
3248 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3249 { /* likely */ }
3250 else
3251 {
3252 uint32_t cNew = pReNative->cLabelsAlloc;
3253 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3254 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3255 cNew *= 2;
3256 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3257 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3258 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3259 pReNative->paLabels = paLabels;
3260 pReNative->cLabelsAlloc = cNew;
3261 }
3262
3263 /*
3264 * Define a new label.
3265 */
3266 paLabels[cLabels].off = offWhere;
3267 paLabels[cLabels].enmType = enmType;
3268 paLabels[cLabels].uData = uData;
3269 pReNative->cLabels = cLabels + 1;
3270
3271 Assert((unsigned)enmType < 64);
3272 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3273
3274 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3275 {
3276 Assert(uData == 0);
3277 pReNative->aidxUniqueLabels[enmType] = cLabels;
3278 }
3279
3280 if (offWhere != UINT32_MAX)
3281 {
3282#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3283 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3284 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3285#endif
3286 }
3287 return cLabels;
3288}
3289
3290
3291/**
3292 * Defines the location of an existing label.
3293 *
3294 * @param pReNative The native recompile state.
3295 * @param idxLabel The label to define.
3296 * @param offWhere The position.
3297 */
3298DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3299{
3300 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3301 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3302 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3303 pLabel->off = offWhere;
3304#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3305 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3306 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3307#endif
3308}
3309
3310
3311/**
3312 * Looks up a lable.
3313 *
3314 * @returns Label ID if found, UINT32_MAX if not.
3315 */
3316static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3317 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3318{
3319 Assert((unsigned)enmType < 64);
3320 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3321 {
3322 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3323 return pReNative->aidxUniqueLabels[enmType];
3324
3325 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3326 uint32_t const cLabels = pReNative->cLabels;
3327 for (uint32_t i = 0; i < cLabels; i++)
3328 if ( paLabels[i].enmType == enmType
3329 && paLabels[i].uData == uData
3330 && ( paLabels[i].off == offWhere
3331 || offWhere == UINT32_MAX
3332 || paLabels[i].off == UINT32_MAX))
3333 return i;
3334 }
3335 return UINT32_MAX;
3336}
3337
3338
3339/**
3340 * Adds a fixup.
3341 *
3342 * @throws VBox status code (int) on failure.
3343 * @param pReNative The native recompile state.
3344 * @param offWhere The instruction offset of the fixup location.
3345 * @param idxLabel The target label ID for the fixup.
3346 * @param enmType The fixup type.
3347 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3348 */
3349DECL_HIDDEN_THROW(void)
3350iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3351 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3352{
3353 Assert(idxLabel <= UINT16_MAX);
3354 Assert((unsigned)enmType <= UINT8_MAX);
3355#ifdef RT_ARCH_ARM64
3356 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3357 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3358 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3359#endif
3360
3361 /*
3362 * Make sure we've room.
3363 */
3364 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3365 uint32_t const cFixups = pReNative->cFixups;
3366 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3367 { /* likely */ }
3368 else
3369 {
3370 uint32_t cNew = pReNative->cFixupsAlloc;
3371 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3372 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3373 cNew *= 2;
3374 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3375 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3376 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3377 pReNative->paFixups = paFixups;
3378 pReNative->cFixupsAlloc = cNew;
3379 }
3380
3381 /*
3382 * Add the fixup.
3383 */
3384 paFixups[cFixups].off = offWhere;
3385 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3386 paFixups[cFixups].enmType = enmType;
3387 paFixups[cFixups].offAddend = offAddend;
3388 pReNative->cFixups = cFixups + 1;
3389}
3390
3391
3392/**
3393 * Slow code path for iemNativeInstrBufEnsure.
3394 */
3395DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3396{
3397 /* Double the buffer size till we meet the request. */
3398 uint32_t cNew = pReNative->cInstrBufAlloc;
3399 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3400 do
3401 cNew *= 2;
3402 while (cNew < off + cInstrReq);
3403
3404 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3405#ifdef RT_ARCH_ARM64
3406 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3407#else
3408 uint32_t const cbMaxInstrBuf = _2M;
3409#endif
3410 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3411
3412 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3413 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3414
3415#ifdef VBOX_STRICT
3416 pReNative->offInstrBufChecked = off + cInstrReq;
3417#endif
3418 pReNative->cInstrBufAlloc = cNew;
3419 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3420}
3421
3422#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3423
3424/**
3425 * Grows the static debug info array used during recompilation.
3426 *
3427 * @returns Pointer to the new debug info block; throws VBox status code on
3428 * failure, so no need to check the return value.
3429 */
3430DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3431{
3432 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3433 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3434 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3435 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3436 pReNative->pDbgInfo = pDbgInfo;
3437 pReNative->cDbgInfoAlloc = cNew;
3438 return pDbgInfo;
3439}
3440
3441
3442/**
3443 * Adds a new debug info uninitialized entry, returning the pointer to it.
3444 */
3445DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3446{
3447 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3448 { /* likely */ }
3449 else
3450 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3451 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3452}
3453
3454
3455/**
3456 * Debug Info: Adds a native offset record, if necessary.
3457 */
3458DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3459{
3460 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3461
3462 /*
3463 * Search backwards to see if we've got a similar record already.
3464 */
3465 uint32_t idx = pDbgInfo->cEntries;
3466 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3467 while (idx-- > idxStop)
3468 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3469 {
3470 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3471 return;
3472 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3473 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3474 break;
3475 }
3476
3477 /*
3478 * Add it.
3479 */
3480 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3481 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3482 pEntry->NativeOffset.offNative = off;
3483}
3484
3485
3486/**
3487 * Debug Info: Record info about a label.
3488 */
3489static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3490{
3491 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3492 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3493 pEntry->Label.uUnused = 0;
3494 pEntry->Label.enmLabel = (uint8_t)enmType;
3495 pEntry->Label.uData = uData;
3496}
3497
3498
3499/**
3500 * Debug Info: Record info about a threaded call.
3501 */
3502static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3503{
3504 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3505 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3506 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3507 pEntry->ThreadedCall.uUnused = 0;
3508 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3509}
3510
3511
3512/**
3513 * Debug Info: Record info about a new guest instruction.
3514 */
3515static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3516{
3517 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3518 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3519 pEntry->GuestInstruction.uUnused = 0;
3520 pEntry->GuestInstruction.fExec = fExec;
3521}
3522
3523
3524/**
3525 * Debug Info: Record info about guest register shadowing.
3526 */
3527DECL_HIDDEN_THROW(void)
3528iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3529 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3530{
3531 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3532 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3533 pEntry->GuestRegShadowing.uUnused = 0;
3534 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3535 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3536 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3537}
3538
3539
3540# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3541/**
3542 * Debug Info: Record info about guest register shadowing.
3543 */
3544DECL_HIDDEN_THROW(void)
3545iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3546 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3547{
3548 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3549 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3550 pEntry->GuestSimdRegShadowing.uUnused = 0;
3551 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3552 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3553 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3554}
3555# endif
3556
3557
3558# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3559/**
3560 * Debug Info: Record info about delayed RIP updates.
3561 */
3562DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3563{
3564 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3565 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3566 pEntry->DelayedPcUpdate.offPc = offPc;
3567 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3568}
3569# endif
3570
3571#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3572
3573
3574/*********************************************************************************************************************************
3575* Register Allocator *
3576*********************************************************************************************************************************/
3577
3578/**
3579 * Register parameter indexes (indexed by argument number).
3580 */
3581DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3582{
3583 IEMNATIVE_CALL_ARG0_GREG,
3584 IEMNATIVE_CALL_ARG1_GREG,
3585 IEMNATIVE_CALL_ARG2_GREG,
3586 IEMNATIVE_CALL_ARG3_GREG,
3587#if defined(IEMNATIVE_CALL_ARG4_GREG)
3588 IEMNATIVE_CALL_ARG4_GREG,
3589# if defined(IEMNATIVE_CALL_ARG5_GREG)
3590 IEMNATIVE_CALL_ARG5_GREG,
3591# if defined(IEMNATIVE_CALL_ARG6_GREG)
3592 IEMNATIVE_CALL_ARG6_GREG,
3593# if defined(IEMNATIVE_CALL_ARG7_GREG)
3594 IEMNATIVE_CALL_ARG7_GREG,
3595# endif
3596# endif
3597# endif
3598#endif
3599};
3600AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3601
3602/**
3603 * Call register masks indexed by argument count.
3604 */
3605DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3606{
3607 0,
3608 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3609 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3610 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3611 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3612 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3613#if defined(IEMNATIVE_CALL_ARG4_GREG)
3614 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3615 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3616# if defined(IEMNATIVE_CALL_ARG5_GREG)
3617 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3618 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3619# if defined(IEMNATIVE_CALL_ARG6_GREG)
3620 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3621 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3622 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3623# if defined(IEMNATIVE_CALL_ARG7_GREG)
3624 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3625 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3626 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3627# endif
3628# endif
3629# endif
3630#endif
3631};
3632
3633#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3634/**
3635 * BP offset of the stack argument slots.
3636 *
3637 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3638 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3639 */
3640DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3641{
3642 IEMNATIVE_FP_OFF_STACK_ARG0,
3643# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3644 IEMNATIVE_FP_OFF_STACK_ARG1,
3645# endif
3646# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3647 IEMNATIVE_FP_OFF_STACK_ARG2,
3648# endif
3649# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3650 IEMNATIVE_FP_OFF_STACK_ARG3,
3651# endif
3652};
3653AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3654#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3655
3656/**
3657 * Info about shadowed guest register values.
3658 * @see IEMNATIVEGSTREG
3659 */
3660DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3661{
3662#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3663 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3664 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3665 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3666 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3667 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3668 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3669 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3670 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3671 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3672 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3673 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3674 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3675 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3676 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3677 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3678 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3679 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3680 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3681 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3682 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3683 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3684 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3685 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3686 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3687 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3688 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3689 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3690 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3691 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3692 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3693 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3694 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3695 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3696 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3697 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3698 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3699 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3700 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3701 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3702 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3703 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3704 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3705 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3706 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3707 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3708 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3709 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3710 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3711#undef CPUMCTX_OFF_AND_SIZE
3712};
3713AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3714
3715
3716/** Host CPU general purpose register names. */
3717DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3718{
3719#ifdef RT_ARCH_AMD64
3720 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3721#elif RT_ARCH_ARM64
3722 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3723 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3724#else
3725# error "port me"
3726#endif
3727};
3728
3729
3730#if 0 /* unused */
3731/**
3732 * Tries to locate a suitable register in the given register mask.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success, returns UINT8_MAX on failure.
3738 */
3739static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3740{
3741 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3742 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3743 if (fRegs)
3744 {
3745 /** @todo pick better here: */
3746 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3747
3748 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3749 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3750 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3751 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3752
3753 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3754 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3755 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3756 return idxReg;
3757 }
3758 return UINT8_MAX;
3759}
3760#endif /* unused */
3761
3762
3763/**
3764 * Locate a register, possibly freeing one up.
3765 *
3766 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3767 * failed.
3768 *
3769 * @returns Host register number on success. Returns UINT8_MAX if no registers
3770 * found, the caller is supposed to deal with this and raise a
3771 * allocation type specific status code (if desired).
3772 *
3773 * @throws VBox status code if we're run into trouble spilling a variable of
3774 * recording debug info. Does NOT throw anything if we're out of
3775 * registers, though.
3776 */
3777static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3778 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3779{
3780 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3781 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3782 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3783
3784 /*
3785 * Try a freed register that's shadowing a guest register.
3786 */
3787 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3788 if (fRegs)
3789 {
3790 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3791
3792#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3793 /*
3794 * When we have livness information, we use it to kick out all shadowed
3795 * guest register that will not be needed any more in this TB. If we're
3796 * lucky, this may prevent us from ending up here again.
3797 *
3798 * Note! We must consider the previous entry here so we don't free
3799 * anything that the current threaded function requires (current
3800 * entry is produced by the next threaded function).
3801 */
3802 uint32_t const idxCurCall = pReNative->idxCurCall;
3803 if (idxCurCall > 0)
3804 {
3805 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3806
3807# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3808 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3809 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3810 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3811#else
3812 /* Construct a mask of the registers not in the read or write state.
3813 Note! We could skips writes, if they aren't from us, as this is just
3814 a hack to prevent trashing registers that have just been written
3815 or will be written when we retire the current instruction. */
3816 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3817 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3818 & IEMLIVENESSBIT_MASK;
3819#endif
3820 /* Merge EFLAGS. */
3821 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3822 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3823 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3824 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3825 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3826
3827 /* If it matches any shadowed registers. */
3828 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3829 {
3830 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3831 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3832 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3833
3834 /* See if we've got any unshadowed registers we can return now. */
3835 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3836 if (fUnshadowedRegs)
3837 {
3838 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3839 return (fPreferVolatile
3840 ? ASMBitFirstSetU32(fUnshadowedRegs)
3841 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3842 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3843 - 1;
3844 }
3845 }
3846 }
3847#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3848
3849 unsigned const idxReg = (fPreferVolatile
3850 ? ASMBitFirstSetU32(fRegs)
3851 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3852 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3853 - 1;
3854
3855 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3856 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3859
3860 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3861 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3862 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3863 return idxReg;
3864 }
3865
3866 /*
3867 * Try free up a variable that's in a register.
3868 *
3869 * We do two rounds here, first evacuating variables we don't need to be
3870 * saved on the stack, then in the second round move things to the stack.
3871 */
3872 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3873 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3874 {
3875 uint32_t fVars = pReNative->Core.bmVars;
3876 while (fVars)
3877 {
3878 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3879 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3880 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3881 && (RT_BIT_32(idxReg) & fRegMask)
3882 && ( iLoop == 0
3883 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3884 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3885 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3886 {
3887 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3888 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3889 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3890 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3891 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3892 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3893
3894 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3895 {
3896 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3897 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3898 }
3899
3900 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3901 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3902
3903 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3904 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3905 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3906 return idxReg;
3907 }
3908 fVars &= ~RT_BIT_32(idxVar);
3909 }
3910 }
3911
3912 return UINT8_MAX;
3913}
3914
3915
3916/**
3917 * Reassigns a variable to a different register specified by the caller.
3918 *
3919 * @returns The new code buffer position.
3920 * @param pReNative The native recompile state.
3921 * @param off The current code buffer position.
3922 * @param idxVar The variable index.
3923 * @param idxRegOld The old host register number.
3924 * @param idxRegNew The new host register number.
3925 * @param pszCaller The caller for logging.
3926 */
3927static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3928 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3929{
3930 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3931 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3932 RT_NOREF(pszCaller);
3933
3934 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3935
3936 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3937 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3938 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3939 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3940
3941 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3942 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3943 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3944 if (fGstRegShadows)
3945 {
3946 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3947 | RT_BIT_32(idxRegNew);
3948 while (fGstRegShadows)
3949 {
3950 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3951 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3952
3953 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3954 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3955 }
3956 }
3957
3958 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3959 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3960 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3961 return off;
3962}
3963
3964
3965/**
3966 * Moves a variable to a different register or spills it onto the stack.
3967 *
3968 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3969 * kinds can easily be recreated if needed later.
3970 *
3971 * @returns The new code buffer position.
3972 * @param pReNative The native recompile state.
3973 * @param off The current code buffer position.
3974 * @param idxVar The variable index.
3975 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3976 * call-volatile registers.
3977 */
3978DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3979 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3980{
3981 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3982 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3983 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3984 Assert(!pVar->fRegAcquired);
3985
3986 uint8_t const idxRegOld = pVar->idxReg;
3987 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3988 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3989 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3990 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3991 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3992 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3993 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3994 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3995
3996
3997 /** @todo Add statistics on this.*/
3998 /** @todo Implement basic variable liveness analysis (python) so variables
3999 * can be freed immediately once no longer used. This has the potential to
4000 * be trashing registers and stack for dead variables.
4001 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4002
4003 /*
4004 * First try move it to a different register, as that's cheaper.
4005 */
4006 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4007 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4008 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4009 if (fRegs)
4010 {
4011 /* Avoid using shadow registers, if possible. */
4012 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4013 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4014 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4015 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4016 }
4017
4018 /*
4019 * Otherwise we must spill the register onto the stack.
4020 */
4021 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4022 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4023 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4024 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4025
4026 pVar->idxReg = UINT8_MAX;
4027 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4028 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4029 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4030 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4031 return off;
4032}
4033
4034
4035/**
4036 * Allocates a temporary host general purpose register.
4037 *
4038 * This may emit code to save register content onto the stack in order to free
4039 * up a register.
4040 *
4041 * @returns The host register number; throws VBox status code on failure,
4042 * so no need to check the return value.
4043 * @param pReNative The native recompile state.
4044 * @param poff Pointer to the variable with the code buffer position.
4045 * This will be update if we need to move a variable from
4046 * register to stack in order to satisfy the request.
4047 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4048 * registers (@c true, default) or the other way around
4049 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4050 */
4051DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4052{
4053 /*
4054 * Try find a completely unused register, preferably a call-volatile one.
4055 */
4056 uint8_t idxReg;
4057 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4058 & ~pReNative->Core.bmHstRegsWithGstShadow
4059 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4060 if (fRegs)
4061 {
4062 if (fPreferVolatile)
4063 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4064 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4065 else
4066 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4067 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4068 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4069 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4070 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4071 }
4072 else
4073 {
4074 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4075 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4076 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4077 }
4078 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4079}
4080
4081
4082/**
4083 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4084 * registers.
4085 *
4086 * @returns The host register number; throws VBox status code on failure,
4087 * so no need to check the return value.
4088 * @param pReNative The native recompile state.
4089 * @param poff Pointer to the variable with the code buffer position.
4090 * This will be update if we need to move a variable from
4091 * register to stack in order to satisfy the request.
4092 * @param fRegMask Mask of acceptable registers.
4093 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4094 * registers (@c true, default) or the other way around
4095 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4096 */
4097DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4098 bool fPreferVolatile /*= true*/)
4099{
4100 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4101 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4102
4103 /*
4104 * Try find a completely unused register, preferably a call-volatile one.
4105 */
4106 uint8_t idxReg;
4107 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4108 & ~pReNative->Core.bmHstRegsWithGstShadow
4109 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4110 & fRegMask;
4111 if (fRegs)
4112 {
4113 if (fPreferVolatile)
4114 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4115 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4116 else
4117 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4118 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4119 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4120 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4121 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4122 }
4123 else
4124 {
4125 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4126 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4127 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4128 }
4129 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4130}
4131
4132
4133/**
4134 * Allocates a temporary register for loading an immediate value into.
4135 *
4136 * This will emit code to load the immediate, unless there happens to be an
4137 * unused register with the value already loaded.
4138 *
4139 * The caller will not modify the returned register, it must be considered
4140 * read-only. Free using iemNativeRegFreeTmpImm.
4141 *
4142 * @returns The host register number; throws VBox status code on failure, so no
4143 * need to check the return value.
4144 * @param pReNative The native recompile state.
4145 * @param poff Pointer to the variable with the code buffer position.
4146 * @param uImm The immediate value that the register must hold upon
4147 * return.
4148 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4149 * registers (@c true, default) or the other way around
4150 * (@c false).
4151 *
4152 * @note Reusing immediate values has not been implemented yet.
4153 */
4154DECL_HIDDEN_THROW(uint8_t)
4155iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4156{
4157 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4158 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4159 return idxReg;
4160}
4161
4162
4163/**
4164 * Allocates a temporary host general purpose register for keeping a guest
4165 * register value.
4166 *
4167 * Since we may already have a register holding the guest register value,
4168 * code will be emitted to do the loading if that's not the case. Code may also
4169 * be emitted if we have to free up a register to satify the request.
4170 *
4171 * @returns The host register number; throws VBox status code on failure, so no
4172 * need to check the return value.
4173 * @param pReNative The native recompile state.
4174 * @param poff Pointer to the variable with the code buffer
4175 * position. This will be update if we need to move a
4176 * variable from register to stack in order to satisfy
4177 * the request.
4178 * @param enmGstReg The guest register that will is to be updated.
4179 * @param enmIntendedUse How the caller will be using the host register.
4180 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4181 * register is okay (default). The ASSUMPTION here is
4182 * that the caller has already flushed all volatile
4183 * registers, so this is only applied if we allocate a
4184 * new register.
4185 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4186 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4187 */
4188DECL_HIDDEN_THROW(uint8_t)
4189iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4190 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4191 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4192{
4193 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4194#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4195 AssertMsg( fSkipLivenessAssert
4196 || pReNative->idxCurCall == 0
4197 || enmGstReg == kIemNativeGstReg_Pc
4198 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4199 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4200 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4201 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4202 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4203 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4204#endif
4205 RT_NOREF(fSkipLivenessAssert);
4206#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4207 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4208#endif
4209 uint32_t const fRegMask = !fNoVolatileRegs
4210 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4211 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4212
4213 /*
4214 * First check if the guest register value is already in a host register.
4215 */
4216 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4217 {
4218 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4219 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4220 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4221 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4222
4223 /* It's not supposed to be allocated... */
4224 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4225 {
4226 /*
4227 * If the register will trash the guest shadow copy, try find a
4228 * completely unused register we can use instead. If that fails,
4229 * we need to disassociate the host reg from the guest reg.
4230 */
4231 /** @todo would be nice to know if preserving the register is in any way helpful. */
4232 /* If the purpose is calculations, try duplicate the register value as
4233 we'll be clobbering the shadow. */
4234 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4235 && ( ~pReNative->Core.bmHstRegs
4236 & ~pReNative->Core.bmHstRegsWithGstShadow
4237 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4238 {
4239 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4240
4241 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4242
4243 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4244 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4245 g_apszIemNativeHstRegNames[idxRegNew]));
4246 idxReg = idxRegNew;
4247 }
4248 /* If the current register matches the restrictions, go ahead and allocate
4249 it for the caller. */
4250 else if (fRegMask & RT_BIT_32(idxReg))
4251 {
4252 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4253 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4254 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4255 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4256 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4257 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4258 else
4259 {
4260 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4261 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4262 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4263 }
4264 }
4265 /* Otherwise, allocate a register that satisfies the caller and transfer
4266 the shadowing if compatible with the intended use. (This basically
4267 means the call wants a non-volatile register (RSP push/pop scenario).) */
4268 else
4269 {
4270 Assert(fNoVolatileRegs);
4271 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4272 !fNoVolatileRegs
4273 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4274 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4275 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4276 {
4277 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4278 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4279 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4280 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4281 }
4282 else
4283 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4284 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4285 g_apszIemNativeHstRegNames[idxRegNew]));
4286 idxReg = idxRegNew;
4287 }
4288 }
4289 else
4290 {
4291 /*
4292 * Oops. Shadowed guest register already allocated!
4293 *
4294 * Allocate a new register, copy the value and, if updating, the
4295 * guest shadow copy assignment to the new register.
4296 */
4297 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4298 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4299 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4300 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4301
4302 /** @todo share register for readonly access. */
4303 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4304 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4305
4306 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4307 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4308
4309 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4310 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4311 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4312 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4313 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4314 else
4315 {
4316 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4317 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4318 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4319 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4320 }
4321 idxReg = idxRegNew;
4322 }
4323 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4324
4325#ifdef VBOX_STRICT
4326 /* Strict builds: Check that the value is correct. */
4327 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4328#endif
4329
4330 return idxReg;
4331 }
4332
4333 /*
4334 * Allocate a new register, load it with the guest value and designate it as a copy of the
4335 */
4336 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4337
4338 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4339 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4340
4341 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4342 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4343 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4344 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4345
4346 return idxRegNew;
4347}
4348
4349
4350/**
4351 * Allocates a temporary host general purpose register that already holds the
4352 * given guest register value.
4353 *
4354 * The use case for this function is places where the shadowing state cannot be
4355 * modified due to branching and such. This will fail if the we don't have a
4356 * current shadow copy handy or if it's incompatible. The only code that will
4357 * be emitted here is value checking code in strict builds.
4358 *
4359 * The intended use can only be readonly!
4360 *
4361 * @returns The host register number, UINT8_MAX if not present.
4362 * @param pReNative The native recompile state.
4363 * @param poff Pointer to the instruction buffer offset.
4364 * Will be updated in strict builds if a register is
4365 * found.
4366 * @param enmGstReg The guest register that will is to be updated.
4367 * @note In strict builds, this may throw instruction buffer growth failures.
4368 * Non-strict builds will not throw anything.
4369 * @sa iemNativeRegAllocTmpForGuestReg
4370 */
4371DECL_HIDDEN_THROW(uint8_t)
4372iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4373{
4374 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4375#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4376 AssertMsg( pReNative->idxCurCall == 0
4377 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4378 || enmGstReg == kIemNativeGstReg_Pc,
4379 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4380#endif
4381
4382 /*
4383 * First check if the guest register value is already in a host register.
4384 */
4385 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4386 {
4387 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4388 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4389 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4390 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4391
4392 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4393 {
4394 /*
4395 * We only do readonly use here, so easy compared to the other
4396 * variant of this code.
4397 */
4398 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4399 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4400 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4401 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4402 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4403
4404#ifdef VBOX_STRICT
4405 /* Strict builds: Check that the value is correct. */
4406 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4407#else
4408 RT_NOREF(poff);
4409#endif
4410 return idxReg;
4411 }
4412 }
4413
4414 return UINT8_MAX;
4415}
4416
4417
4418/**
4419 * Allocates argument registers for a function call.
4420 *
4421 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4422 * need to check the return value.
4423 * @param pReNative The native recompile state.
4424 * @param off The current code buffer offset.
4425 * @param cArgs The number of arguments the function call takes.
4426 */
4427DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4428{
4429 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4430 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4431 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4432 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4433
4434 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4435 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4436 else if (cArgs == 0)
4437 return true;
4438
4439 /*
4440 * Do we get luck and all register are free and not shadowing anything?
4441 */
4442 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4443 for (uint32_t i = 0; i < cArgs; i++)
4444 {
4445 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4446 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4447 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4448 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4449 }
4450 /*
4451 * Okay, not lucky so we have to free up the registers.
4452 */
4453 else
4454 for (uint32_t i = 0; i < cArgs; i++)
4455 {
4456 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4457 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4458 {
4459 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4460 {
4461 case kIemNativeWhat_Var:
4462 {
4463 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4464 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4465 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4466 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4467 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4468
4469 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4470 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4471 else
4472 {
4473 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4474 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4475 }
4476 break;
4477 }
4478
4479 case kIemNativeWhat_Tmp:
4480 case kIemNativeWhat_Arg:
4481 case kIemNativeWhat_rc:
4482 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4483 default:
4484 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4485 }
4486
4487 }
4488 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4489 {
4490 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4491 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4492 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4493 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4494 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4495 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4496 }
4497 else
4498 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4499 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4500 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4501 }
4502 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4503 return true;
4504}
4505
4506
4507DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4508
4509
4510#if 0
4511/**
4512 * Frees a register assignment of any type.
4513 *
4514 * @param pReNative The native recompile state.
4515 * @param idxHstReg The register to free.
4516 *
4517 * @note Does not update variables.
4518 */
4519DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4520{
4521 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4522 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4523 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4524 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4525 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4526 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4527 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4528 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4529 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4530 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4531 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4532 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4533 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4534 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4535
4536 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4537 /* no flushing, right:
4538 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4539 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4540 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4541 */
4542}
4543#endif
4544
4545
4546/**
4547 * Frees a temporary register.
4548 *
4549 * Any shadow copies of guest registers assigned to the host register will not
4550 * be flushed by this operation.
4551 */
4552DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4553{
4554 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4555 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4556 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4557 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4558 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4559}
4560
4561
4562/**
4563 * Frees a temporary immediate register.
4564 *
4565 * It is assumed that the call has not modified the register, so it still hold
4566 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4567 */
4568DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4569{
4570 iemNativeRegFreeTmp(pReNative, idxHstReg);
4571}
4572
4573
4574/**
4575 * Frees a register assigned to a variable.
4576 *
4577 * The register will be disassociated from the variable.
4578 */
4579DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4580{
4581 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4582 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4583 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4584 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4585 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4586
4587 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4588 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4589 if (!fFlushShadows)
4590 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4591 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4592 else
4593 {
4594 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4595 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4596 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4597 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4598 uint64_t fGstRegShadows = fGstRegShadowsOld;
4599 while (fGstRegShadows)
4600 {
4601 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4602 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4603
4604 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4605 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4606 }
4607 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4608 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4609 }
4610}
4611
4612
4613/**
4614 * Called right before emitting a call instruction to move anything important
4615 * out of call-volatile registers, free and flush the call-volatile registers,
4616 * optionally freeing argument variables.
4617 *
4618 * @returns New code buffer offset, UINT32_MAX on failure.
4619 * @param pReNative The native recompile state.
4620 * @param off The code buffer offset.
4621 * @param cArgs The number of arguments the function call takes.
4622 * It is presumed that the host register part of these have
4623 * been allocated as such already and won't need moving,
4624 * just freeing.
4625 * @param fKeepVars Mask of variables that should keep their register
4626 * assignments. Caller must take care to handle these.
4627 */
4628DECL_HIDDEN_THROW(uint32_t)
4629iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4630{
4631 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4632
4633 /* fKeepVars will reduce this mask. */
4634 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4635
4636 /*
4637 * Move anything important out of volatile registers.
4638 */
4639 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4640 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4641 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4642#ifdef IEMNATIVE_REG_FIXED_TMP0
4643 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4644#endif
4645#ifdef IEMNATIVE_REG_FIXED_TMP1
4646 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4647#endif
4648#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4649 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4650#endif
4651 & ~g_afIemNativeCallRegs[cArgs];
4652
4653 fRegsToMove &= pReNative->Core.bmHstRegs;
4654 if (!fRegsToMove)
4655 { /* likely */ }
4656 else
4657 {
4658 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4659 while (fRegsToMove != 0)
4660 {
4661 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4662 fRegsToMove &= ~RT_BIT_32(idxReg);
4663
4664 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4665 {
4666 case kIemNativeWhat_Var:
4667 {
4668 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4670 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4671 Assert(pVar->idxReg == idxReg);
4672 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4673 {
4674 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4675 idxVar, pVar->enmKind, pVar->idxReg));
4676 if (pVar->enmKind != kIemNativeVarKind_Stack)
4677 pVar->idxReg = UINT8_MAX;
4678 else
4679 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4680 }
4681 else
4682 fRegsToFree &= ~RT_BIT_32(idxReg);
4683 continue;
4684 }
4685
4686 case kIemNativeWhat_Arg:
4687 AssertMsgFailed(("What?!?: %u\n", idxReg));
4688 continue;
4689
4690 case kIemNativeWhat_rc:
4691 case kIemNativeWhat_Tmp:
4692 AssertMsgFailed(("Missing free: %u\n", idxReg));
4693 continue;
4694
4695 case kIemNativeWhat_FixedTmp:
4696 case kIemNativeWhat_pVCpuFixed:
4697 case kIemNativeWhat_pCtxFixed:
4698 case kIemNativeWhat_PcShadow:
4699 case kIemNativeWhat_FixedReserved:
4700 case kIemNativeWhat_Invalid:
4701 case kIemNativeWhat_End:
4702 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4703 }
4704 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4705 }
4706 }
4707
4708 /*
4709 * Do the actual freeing.
4710 */
4711 if (pReNative->Core.bmHstRegs & fRegsToFree)
4712 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4713 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4714 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4715
4716 /* If there are guest register shadows in any call-volatile register, we
4717 have to clear the corrsponding guest register masks for each register. */
4718 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4719 if (fHstRegsWithGstShadow)
4720 {
4721 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4722 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4723 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4724 do
4725 {
4726 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4727 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4728
4729 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4730 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4731 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4732 } while (fHstRegsWithGstShadow != 0);
4733 }
4734
4735 return off;
4736}
4737
4738
4739/**
4740 * Flushes a set of guest register shadow copies.
4741 *
4742 * This is usually done after calling a threaded function or a C-implementation
4743 * of an instruction.
4744 *
4745 * @param pReNative The native recompile state.
4746 * @param fGstRegs Set of guest registers to flush.
4747 */
4748DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4749{
4750 /*
4751 * Reduce the mask by what's currently shadowed
4752 */
4753 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4754 fGstRegs &= bmGstRegShadowsOld;
4755 if (fGstRegs)
4756 {
4757 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4758 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4759 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4760 if (bmGstRegShadowsNew)
4761 {
4762 /*
4763 * Partial.
4764 */
4765 do
4766 {
4767 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4768 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4769 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4770 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4771 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4772
4773 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4774 fGstRegs &= ~fInThisHstReg;
4775 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4776 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4777 if (!fGstRegShadowsNew)
4778 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4779 } while (fGstRegs != 0);
4780 }
4781 else
4782 {
4783 /*
4784 * Clear all.
4785 */
4786 do
4787 {
4788 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4789 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4790 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4791 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4792 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4793
4794 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4795 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4796 } while (fGstRegs != 0);
4797 pReNative->Core.bmHstRegsWithGstShadow = 0;
4798 }
4799 }
4800}
4801
4802
4803/**
4804 * Flushes guest register shadow copies held by a set of host registers.
4805 *
4806 * This is used with the TLB lookup code for ensuring that we don't carry on
4807 * with any guest shadows in volatile registers, as these will get corrupted by
4808 * a TLB miss.
4809 *
4810 * @param pReNative The native recompile state.
4811 * @param fHstRegs Set of host registers to flush guest shadows for.
4812 */
4813DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4814{
4815 /*
4816 * Reduce the mask by what's currently shadowed.
4817 */
4818 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4819 fHstRegs &= bmHstRegsWithGstShadowOld;
4820 if (fHstRegs)
4821 {
4822 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4823 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4824 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4825 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4826 if (bmHstRegsWithGstShadowNew)
4827 {
4828 /*
4829 * Partial (likely).
4830 */
4831 uint64_t fGstShadows = 0;
4832 do
4833 {
4834 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4835 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4836 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4837 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4838
4839 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4840 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4841 fHstRegs &= ~RT_BIT_32(idxHstReg);
4842 } while (fHstRegs != 0);
4843 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4844 }
4845 else
4846 {
4847 /*
4848 * Clear all.
4849 */
4850 do
4851 {
4852 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4853 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4854 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4855 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4856
4857 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4858 fHstRegs &= ~RT_BIT_32(idxHstReg);
4859 } while (fHstRegs != 0);
4860 pReNative->Core.bmGstRegShadows = 0;
4861 }
4862 }
4863}
4864
4865
4866/**
4867 * Restores guest shadow copies in volatile registers.
4868 *
4869 * This is used after calling a helper function (think TLB miss) to restore the
4870 * register state of volatile registers.
4871 *
4872 * @param pReNative The native recompile state.
4873 * @param off The code buffer offset.
4874 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4875 * be active (allocated) w/o asserting. Hack.
4876 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4877 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4878 */
4879DECL_HIDDEN_THROW(uint32_t)
4880iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4881{
4882 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4883 if (fHstRegs)
4884 {
4885 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4886 do
4887 {
4888 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4889
4890 /* It's not fatal if a register is active holding a variable that
4891 shadowing a guest register, ASSUMING all pending guest register
4892 writes were flushed prior to the helper call. However, we'll be
4893 emitting duplicate restores, so it wasts code space. */
4894 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4895 RT_NOREF(fHstRegsActiveShadows);
4896
4897 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4898 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4899 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4900 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4901
4902 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4903 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4904
4905 fHstRegs &= ~RT_BIT_32(idxHstReg);
4906 } while (fHstRegs != 0);
4907 }
4908 return off;
4909}
4910
4911
4912
4913
4914/*********************************************************************************************************************************
4915* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4916*********************************************************************************************************************************/
4917#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4918
4919/**
4920 * Info about shadowed guest SIMD register values.
4921 * @see IEMNATIVEGSTSIMDREG
4922 */
4923static struct
4924{
4925 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4926 uint32_t offXmm;
4927 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4928 uint32_t offYmm;
4929 /** Name (for logging). */
4930 const char *pszName;
4931} const g_aGstSimdShadowInfo[] =
4932{
4933#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4934 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4935 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4936 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4937 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4938 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4939 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4940 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4941 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4942 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4943 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4944 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4945 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4946 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4947 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4948 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4949 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4950 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4951#undef CPUMCTX_OFF_AND_SIZE
4952};
4953AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4954
4955
4956#ifdef LOG_ENABLED
4957/** Host CPU SIMD register names. */
4958DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4959{
4960#ifdef RT_ARCH_AMD64
4961 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4962#elif RT_ARCH_ARM64
4963 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4964 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4965#else
4966# error "port me"
4967#endif
4968};
4969#endif
4970
4971
4972/**
4973 * Frees a temporary SIMD register.
4974 *
4975 * Any shadow copies of guest registers assigned to the host register will not
4976 * be flushed by this operation.
4977 */
4978DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4979{
4980 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4981 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4982 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4983 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4984 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4985}
4986
4987
4988/**
4989 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4990 *
4991 * @returns New code bufferoffset.
4992 * @param pReNative The native recompile state.
4993 * @param off Current code buffer position.
4994 * @param enmGstSimdReg The guest SIMD register to flush.
4995 */
4996DECL_HIDDEN_THROW(uint32_t)
4997iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4998{
4999 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5000
5001 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5002 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5003 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5004 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5005
5006 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5007 {
5008 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5009 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5010 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5011 }
5012
5013 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5014 {
5015 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5016 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5017 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5018 }
5019
5020 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5021 return off;
5022}
5023
5024
5025/**
5026 * Locate a register, possibly freeing one up.
5027 *
5028 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5029 * failed.
5030 *
5031 * @returns Host register number on success. Returns UINT8_MAX if no registers
5032 * found, the caller is supposed to deal with this and raise a
5033 * allocation type specific status code (if desired).
5034 *
5035 * @throws VBox status code if we're run into trouble spilling a variable of
5036 * recording debug info. Does NOT throw anything if we're out of
5037 * registers, though.
5038 */
5039static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5040 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5041{
5042 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5043 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5044 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5045
5046 /*
5047 * Try a freed register that's shadowing a guest register.
5048 */
5049 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5050 if (fRegs)
5051 {
5052 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5053
5054#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5055 /*
5056 * When we have livness information, we use it to kick out all shadowed
5057 * guest register that will not be needed any more in this TB. If we're
5058 * lucky, this may prevent us from ending up here again.
5059 *
5060 * Note! We must consider the previous entry here so we don't free
5061 * anything that the current threaded function requires (current
5062 * entry is produced by the next threaded function).
5063 */
5064 uint32_t const idxCurCall = pReNative->idxCurCall;
5065 if (idxCurCall > 0)
5066 {
5067 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5068
5069# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5070 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5071 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5072 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5073#else
5074 /* Construct a mask of the registers not in the read or write state.
5075 Note! We could skips writes, if they aren't from us, as this is just
5076 a hack to prevent trashing registers that have just been written
5077 or will be written when we retire the current instruction. */
5078 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5079 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5080 & IEMLIVENESSBIT_MASK;
5081#endif
5082 /* If it matches any shadowed registers. */
5083 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5084 {
5085 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5086 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5087 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5088
5089 /* See if we've got any unshadowed registers we can return now. */
5090 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5091 if (fUnshadowedRegs)
5092 {
5093 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5094 return (fPreferVolatile
5095 ? ASMBitFirstSetU32(fUnshadowedRegs)
5096 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5097 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5098 - 1;
5099 }
5100 }
5101 }
5102#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5103
5104 unsigned const idxReg = (fPreferVolatile
5105 ? ASMBitFirstSetU32(fRegs)
5106 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5107 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5108 - 1;
5109
5110 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5111 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5112 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5113 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5114
5115 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5116 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5117 uint32_t idxGstSimdReg = 0;
5118 do
5119 {
5120 if (fGstRegShadows & 0x1)
5121 {
5122 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5123 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5124 }
5125 idxGstSimdReg++;
5126 fGstRegShadows >>= 1;
5127 } while (fGstRegShadows);
5128
5129 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5130 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5131 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5132 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5133 return idxReg;
5134 }
5135
5136 /*
5137 * Try free up a variable that's in a register.
5138 *
5139 * We do two rounds here, first evacuating variables we don't need to be
5140 * saved on the stack, then in the second round move things to the stack.
5141 */
5142 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5143 AssertReleaseFailed(); /** @todo No variable support right now. */
5144#if 0
5145 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5146 {
5147 uint32_t fVars = pReNative->Core.bmSimdVars;
5148 while (fVars)
5149 {
5150 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5151 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5152 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5153 && (RT_BIT_32(idxReg) & fRegMask)
5154 && ( iLoop == 0
5155 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5156 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5157 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5158 {
5159 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5160 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5161 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5162 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5163 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5164 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5165
5166 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5167 {
5168 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5169 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5170 }
5171
5172 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5173 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5174
5175 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5176 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5177 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5178 return idxReg;
5179 }
5180 fVars &= ~RT_BIT_32(idxVar);
5181 }
5182 }
5183#endif
5184
5185 AssertFailed();
5186 return UINT8_MAX;
5187}
5188
5189
5190/**
5191 * Flushes a set of guest register shadow copies.
5192 *
5193 * This is usually done after calling a threaded function or a C-implementation
5194 * of an instruction.
5195 *
5196 * @param pReNative The native recompile state.
5197 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5198 */
5199DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5200{
5201 /*
5202 * Reduce the mask by what's currently shadowed
5203 */
5204 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5205 fGstSimdRegs &= bmGstSimdRegShadows;
5206 if (fGstSimdRegs)
5207 {
5208 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5209 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5210 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5211 if (bmGstSimdRegShadowsNew)
5212 {
5213 /*
5214 * Partial.
5215 */
5216 do
5217 {
5218 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5219 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5220 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5221 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5222 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5223 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5224
5225 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5226 fGstSimdRegs &= ~fInThisHstReg;
5227 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5228 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5229 if (!fGstRegShadowsNew)
5230 {
5231 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5232 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5233 }
5234 } while (fGstSimdRegs != 0);
5235 }
5236 else
5237 {
5238 /*
5239 * Clear all.
5240 */
5241 do
5242 {
5243 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5244 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5245 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5246 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5247 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5248 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5249
5250 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5251 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5252 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5253 } while (fGstSimdRegs != 0);
5254 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5255 }
5256 }
5257}
5258
5259
5260/**
5261 * Allocates a temporary host SIMD register.
5262 *
5263 * This may emit code to save register content onto the stack in order to free
5264 * up a register.
5265 *
5266 * @returns The host register number; throws VBox status code on failure,
5267 * so no need to check the return value.
5268 * @param pReNative The native recompile state.
5269 * @param poff Pointer to the variable with the code buffer position.
5270 * This will be update if we need to move a variable from
5271 * register to stack in order to satisfy the request.
5272 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5273 * registers (@c true, default) or the other way around
5274 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5275 */
5276DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5277{
5278 /*
5279 * Try find a completely unused register, preferably a call-volatile one.
5280 */
5281 uint8_t idxSimdReg;
5282 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5283 & ~pReNative->Core.bmHstRegsWithGstShadow
5284 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5285 if (fRegs)
5286 {
5287 if (fPreferVolatile)
5288 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5289 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5290 else
5291 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5292 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5293 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5294 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5295 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5296 }
5297 else
5298 {
5299 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5300 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5301 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5302 }
5303
5304 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5305 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5306}
5307
5308
5309/**
5310 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5311 * registers.
5312 *
5313 * @returns The host register number; throws VBox status code on failure,
5314 * so no need to check the return value.
5315 * @param pReNative The native recompile state.
5316 * @param poff Pointer to the variable with the code buffer position.
5317 * This will be update if we need to move a variable from
5318 * register to stack in order to satisfy the request.
5319 * @param fRegMask Mask of acceptable registers.
5320 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5321 * registers (@c true, default) or the other way around
5322 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5323 */
5324DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5325 bool fPreferVolatile /*= true*/)
5326{
5327 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5328 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5329
5330 /*
5331 * Try find a completely unused register, preferably a call-volatile one.
5332 */
5333 uint8_t idxSimdReg;
5334 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5335 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5336 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5337 & fRegMask;
5338 if (fRegs)
5339 {
5340 if (fPreferVolatile)
5341 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5342 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5343 else
5344 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5345 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5346 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5347 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5348 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5349 }
5350 else
5351 {
5352 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5353 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5354 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5355 }
5356
5357 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5358 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5359}
5360
5361
5362/**
5363 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5364 *
5365 * @param pReNative The native recompile state.
5366 * @param idxHstSimdReg The host SIMD register to update the state for.
5367 * @param enmLoadSz The load size to set.
5368 */
5369DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5370 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5371{
5372 /* Everything valid already? -> nothing to do. */
5373 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5374 return;
5375
5376 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5377 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5378 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5379 {
5380 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5381 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5382 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5383 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5384 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5385 }
5386}
5387
5388
5389static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5390 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5391{
5392 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5393 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5394 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5395 {
5396# ifdef RT_ARCH_ARM64
5397 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5398 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5399# endif
5400
5401 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5402 {
5403 switch (enmLoadSzDst)
5404 {
5405 case kIemNativeGstSimdRegLdStSz_256:
5406 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5407 break;
5408 case kIemNativeGstSimdRegLdStSz_Low128:
5409 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5410 break;
5411 case kIemNativeGstSimdRegLdStSz_High128:
5412 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5413 break;
5414 default:
5415 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5416 }
5417
5418 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5419 }
5420 }
5421 else
5422 {
5423 /* Complicated stuff where the source is currently missing something, later. */
5424 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5425 }
5426
5427 return off;
5428}
5429
5430
5431/**
5432 * Allocates a temporary host SIMD register for keeping a guest
5433 * SIMD register value.
5434 *
5435 * Since we may already have a register holding the guest register value,
5436 * code will be emitted to do the loading if that's not the case. Code may also
5437 * be emitted if we have to free up a register to satify the request.
5438 *
5439 * @returns The host register number; throws VBox status code on failure, so no
5440 * need to check the return value.
5441 * @param pReNative The native recompile state.
5442 * @param poff Pointer to the variable with the code buffer
5443 * position. This will be update if we need to move a
5444 * variable from register to stack in order to satisfy
5445 * the request.
5446 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5447 * @param enmIntendedUse How the caller will be using the host register.
5448 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5449 * register is okay (default). The ASSUMPTION here is
5450 * that the caller has already flushed all volatile
5451 * registers, so this is only applied if we allocate a
5452 * new register.
5453 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5454 */
5455DECL_HIDDEN_THROW(uint8_t)
5456iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5457 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5458 bool fNoVolatileRegs /*= false*/)
5459{
5460 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5461#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5462 AssertMsg( pReNative->idxCurCall == 0
5463 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5464 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5465 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5466 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5467 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5468 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5469#endif
5470#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5471 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5472#endif
5473 uint32_t const fRegMask = !fNoVolatileRegs
5474 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5475 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5476
5477 /*
5478 * First check if the guest register value is already in a host register.
5479 */
5480 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5481 {
5482 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5483 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5484 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5485 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5486
5487 /* It's not supposed to be allocated... */
5488 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5489 {
5490 /*
5491 * If the register will trash the guest shadow copy, try find a
5492 * completely unused register we can use instead. If that fails,
5493 * we need to disassociate the host reg from the guest reg.
5494 */
5495 /** @todo would be nice to know if preserving the register is in any way helpful. */
5496 /* If the purpose is calculations, try duplicate the register value as
5497 we'll be clobbering the shadow. */
5498 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5499 && ( ~pReNative->Core.bmHstSimdRegs
5500 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5501 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5502 {
5503 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5504
5505 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5506
5507 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5508 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5509 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5510 idxSimdReg = idxRegNew;
5511 }
5512 /* If the current register matches the restrictions, go ahead and allocate
5513 it for the caller. */
5514 else if (fRegMask & RT_BIT_32(idxSimdReg))
5515 {
5516 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5517 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5518 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5519 {
5520 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5521 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5522 else
5523 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5524 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5525 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5526 }
5527 else
5528 {
5529 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5530 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5531 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5532 }
5533 }
5534 /* Otherwise, allocate a register that satisfies the caller and transfer
5535 the shadowing if compatible with the intended use. (This basically
5536 means the call wants a non-volatile register (RSP push/pop scenario).) */
5537 else
5538 {
5539 Assert(fNoVolatileRegs);
5540 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5541 !fNoVolatileRegs
5542 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5543 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5544 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5545 {
5546 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5547 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5548 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5549 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5550 }
5551 else
5552 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5553 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5554 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5555 idxSimdReg = idxRegNew;
5556 }
5557 }
5558 else
5559 {
5560 /*
5561 * Oops. Shadowed guest register already allocated!
5562 *
5563 * Allocate a new register, copy the value and, if updating, the
5564 * guest shadow copy assignment to the new register.
5565 */
5566 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5567 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5568 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5569 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5570
5571 /** @todo share register for readonly access. */
5572 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5573 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5574
5575 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5576 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5577 else
5578 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5579
5580 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5581 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5582 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5583 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5584 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5585 else
5586 {
5587 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5588 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5589 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5590 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5591 }
5592 idxSimdReg = idxRegNew;
5593 }
5594 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5595
5596#ifdef VBOX_STRICT
5597 /* Strict builds: Check that the value is correct. */
5598 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5599 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5600#endif
5601
5602 return idxSimdReg;
5603 }
5604
5605 /*
5606 * Allocate a new register, load it with the guest value and designate it as a copy of the
5607 */
5608 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5609
5610 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5611 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5612 else
5613 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5614
5615 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5616 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5617
5618 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5619 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5620
5621 return idxRegNew;
5622}
5623
5624#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5625
5626
5627
5628/*********************************************************************************************************************************
5629* Code emitters for flushing pending guest register writes and sanity checks *
5630*********************************************************************************************************************************/
5631
5632#ifdef VBOX_STRICT
5633/**
5634 * Does internal register allocator sanity checks.
5635 */
5636DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5637{
5638 /*
5639 * Iterate host registers building a guest shadowing set.
5640 */
5641 uint64_t bmGstRegShadows = 0;
5642 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5643 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5644 while (bmHstRegsWithGstShadow)
5645 {
5646 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5647 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5648 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5649
5650 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5651 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5652 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5653 bmGstRegShadows |= fThisGstRegShadows;
5654 while (fThisGstRegShadows)
5655 {
5656 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5657 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5658 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5659 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5660 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5661 }
5662 }
5663 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5664 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5665 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5666
5667 /*
5668 * Now the other way around, checking the guest to host index array.
5669 */
5670 bmHstRegsWithGstShadow = 0;
5671 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5672 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5673 while (bmGstRegShadows)
5674 {
5675 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5676 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5677 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5678
5679 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5680 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5681 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5682 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5683 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5684 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5685 }
5686 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5687 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5688 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5689}
5690#endif /* VBOX_STRICT */
5691
5692
5693/**
5694 * Flushes any delayed guest register writes.
5695 *
5696 * This must be called prior to calling CImpl functions and any helpers that use
5697 * the guest state (like raising exceptions) and such.
5698 *
5699 * This optimization has not yet been implemented. The first target would be
5700 * RIP updates, since these are the most common ones.
5701 */
5702DECL_HIDDEN_THROW(uint32_t)
5703iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5704{
5705#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5706 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5707 off = iemNativeEmitPcWriteback(pReNative, off);
5708#else
5709 RT_NOREF(pReNative, fGstShwExcept);
5710#endif
5711
5712#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5713 /** @todo r=bird: There must be a quicker way to check if anything needs
5714 * doing and then call simd function to do the flushing */
5715 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5716 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5717 {
5718 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5719 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5720
5721 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5722 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5723
5724 if ( fFlushShadows
5725 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5726 {
5727 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5728
5729 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5730 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5731 }
5732 }
5733#else
5734 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5735#endif
5736
5737 return off;
5738}
5739
5740
5741#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5742/**
5743 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5744 */
5745DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5746{
5747 Assert(pReNative->Core.offPc);
5748# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5749 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5750 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5751# endif
5752
5753# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5754 /* Allocate a temporary PC register. */
5755 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5756
5757 /* Perform the addition and store the result. */
5758 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5759 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5760
5761 /* Free but don't flush the PC register. */
5762 iemNativeRegFreeTmp(pReNative, idxPcReg);
5763# else
5764 /* Compare the shadow with the context value, they should match. */
5765 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5766 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5767# endif
5768
5769 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5770 pReNative->Core.offPc = 0;
5771 pReNative->Core.cInstrPcUpdateSkipped = 0;
5772
5773 return off;
5774}
5775#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5776
5777
5778/*********************************************************************************************************************************
5779* Code Emitters (larger snippets) *
5780*********************************************************************************************************************************/
5781
5782/**
5783 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5784 * extending to 64-bit width.
5785 *
5786 * @returns New code buffer offset on success, UINT32_MAX on failure.
5787 * @param pReNative .
5788 * @param off The current code buffer position.
5789 * @param idxHstReg The host register to load the guest register value into.
5790 * @param enmGstReg The guest register to load.
5791 *
5792 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5793 * that is something the caller needs to do if applicable.
5794 */
5795DECL_HIDDEN_THROW(uint32_t)
5796iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5797{
5798 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5799 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5800
5801 switch (g_aGstShadowInfo[enmGstReg].cb)
5802 {
5803 case sizeof(uint64_t):
5804 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5805 case sizeof(uint32_t):
5806 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5807 case sizeof(uint16_t):
5808 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5809#if 0 /* not present in the table. */
5810 case sizeof(uint8_t):
5811 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5812#endif
5813 default:
5814 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5815 }
5816}
5817
5818
5819#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5820/**
5821 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5822 *
5823 * @returns New code buffer offset on success, UINT32_MAX on failure.
5824 * @param pReNative The recompiler state.
5825 * @param off The current code buffer position.
5826 * @param idxHstSimdReg The host register to load the guest register value into.
5827 * @param enmGstSimdReg The guest register to load.
5828 * @param enmLoadSz The load size of the register.
5829 *
5830 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5831 * that is something the caller needs to do if applicable.
5832 */
5833DECL_HIDDEN_THROW(uint32_t)
5834iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5835 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5836{
5837 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5838
5839 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5840 switch (enmLoadSz)
5841 {
5842 case kIemNativeGstSimdRegLdStSz_256:
5843 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5844 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5845 case kIemNativeGstSimdRegLdStSz_Low128:
5846 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5847 case kIemNativeGstSimdRegLdStSz_High128:
5848 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5849 default:
5850 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5851 }
5852}
5853#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5854
5855#ifdef VBOX_STRICT
5856
5857/**
5858 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5859 *
5860 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5861 * Trashes EFLAGS on AMD64.
5862 */
5863DECL_HIDDEN_THROW(uint32_t)
5864iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5865{
5866# ifdef RT_ARCH_AMD64
5867 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5868
5869 /* rol reg64, 32 */
5870 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5871 pbCodeBuf[off++] = 0xc1;
5872 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5873 pbCodeBuf[off++] = 32;
5874
5875 /* test reg32, ffffffffh */
5876 if (idxReg >= 8)
5877 pbCodeBuf[off++] = X86_OP_REX_B;
5878 pbCodeBuf[off++] = 0xf7;
5879 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5880 pbCodeBuf[off++] = 0xff;
5881 pbCodeBuf[off++] = 0xff;
5882 pbCodeBuf[off++] = 0xff;
5883 pbCodeBuf[off++] = 0xff;
5884
5885 /* je/jz +1 */
5886 pbCodeBuf[off++] = 0x74;
5887 pbCodeBuf[off++] = 0x01;
5888
5889 /* int3 */
5890 pbCodeBuf[off++] = 0xcc;
5891
5892 /* rol reg64, 32 */
5893 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5894 pbCodeBuf[off++] = 0xc1;
5895 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5896 pbCodeBuf[off++] = 32;
5897
5898# elif defined(RT_ARCH_ARM64)
5899 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5900 /* lsr tmp0, reg64, #32 */
5901 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5902 /* cbz tmp0, +1 */
5903 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5904 /* brk #0x1100 */
5905 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5906
5907# else
5908# error "Port me!"
5909# endif
5910 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5911 return off;
5912}
5913
5914
5915/**
5916 * Emitting code that checks that the content of register @a idxReg is the same
5917 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5918 * instruction if that's not the case.
5919 *
5920 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5921 * Trashes EFLAGS on AMD64.
5922 */
5923DECL_HIDDEN_THROW(uint32_t)
5924iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5925{
5926# ifdef RT_ARCH_AMD64
5927 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5928
5929 /* cmp reg, [mem] */
5930 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5931 {
5932 if (idxReg >= 8)
5933 pbCodeBuf[off++] = X86_OP_REX_R;
5934 pbCodeBuf[off++] = 0x38;
5935 }
5936 else
5937 {
5938 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5939 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5940 else
5941 {
5942 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5943 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5944 else
5945 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5946 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5947 if (idxReg >= 8)
5948 pbCodeBuf[off++] = X86_OP_REX_R;
5949 }
5950 pbCodeBuf[off++] = 0x39;
5951 }
5952 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5953
5954 /* je/jz +1 */
5955 pbCodeBuf[off++] = 0x74;
5956 pbCodeBuf[off++] = 0x01;
5957
5958 /* int3 */
5959 pbCodeBuf[off++] = 0xcc;
5960
5961 /* For values smaller than the register size, we must check that the rest
5962 of the register is all zeros. */
5963 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5964 {
5965 /* test reg64, imm32 */
5966 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5967 pbCodeBuf[off++] = 0xf7;
5968 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5969 pbCodeBuf[off++] = 0;
5970 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5971 pbCodeBuf[off++] = 0xff;
5972 pbCodeBuf[off++] = 0xff;
5973
5974 /* je/jz +1 */
5975 pbCodeBuf[off++] = 0x74;
5976 pbCodeBuf[off++] = 0x01;
5977
5978 /* int3 */
5979 pbCodeBuf[off++] = 0xcc;
5980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5981 }
5982 else
5983 {
5984 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5985 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5986 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5987 }
5988
5989# elif defined(RT_ARCH_ARM64)
5990 /* mov TMP0, [gstreg] */
5991 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5992
5993 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5994 /* sub tmp0, tmp0, idxReg */
5995 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5996 /* cbz tmp0, +1 */
5997 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5998 /* brk #0x1000+enmGstReg */
5999 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6000 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6001
6002# else
6003# error "Port me!"
6004# endif
6005 return off;
6006}
6007
6008
6009# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6010/**
6011 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6012 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6013 * instruction if that's not the case.
6014 *
6015 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6016 * Trashes EFLAGS on AMD64.
6017 */
6018DECL_HIDDEN_THROW(uint32_t)
6019iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6020 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6021{
6022 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6023 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6024 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6025 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6026 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6027 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6028 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6029 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6030 return off;
6031
6032# ifdef RT_ARCH_AMD64
6033 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6034
6035 /* movdqa vectmp0, idxSimdReg */
6036 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6037
6038 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6039
6040 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6041 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6042 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
6043 pbCodeBuf[off++] = X86_OP_REX_R;
6044 pbCodeBuf[off++] = 0x0f;
6045 pbCodeBuf[off++] = 0x38;
6046 pbCodeBuf[off++] = 0x29;
6047 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6048
6049 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6050 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6051 pbCodeBuf[off++] = X86_OP_REX_W
6052 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6053 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6054 pbCodeBuf[off++] = 0x0f;
6055 pbCodeBuf[off++] = 0x3a;
6056 pbCodeBuf[off++] = 0x16;
6057 pbCodeBuf[off++] = 0xeb;
6058 pbCodeBuf[off++] = 0x00;
6059
6060 /* cmp tmp0, 0xffffffffffffffff. */
6061 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6062 pbCodeBuf[off++] = 0x83;
6063 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6064 pbCodeBuf[off++] = 0xff;
6065
6066 /* je/jz +1 */
6067 pbCodeBuf[off++] = 0x74;
6068 pbCodeBuf[off++] = 0x01;
6069
6070 /* int3 */
6071 pbCodeBuf[off++] = 0xcc;
6072
6073 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6074 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6075 pbCodeBuf[off++] = X86_OP_REX_W
6076 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6077 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6078 pbCodeBuf[off++] = 0x0f;
6079 pbCodeBuf[off++] = 0x3a;
6080 pbCodeBuf[off++] = 0x16;
6081 pbCodeBuf[off++] = 0xeb;
6082 pbCodeBuf[off++] = 0x01;
6083
6084 /* cmp tmp0, 0xffffffffffffffff. */
6085 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6086 pbCodeBuf[off++] = 0x83;
6087 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6088 pbCodeBuf[off++] = 0xff;
6089
6090 /* je/jz +1 */
6091 pbCodeBuf[off++] = 0x74;
6092 pbCodeBuf[off++] = 0x01;
6093
6094 /* int3 */
6095 pbCodeBuf[off++] = 0xcc;
6096
6097# elif defined(RT_ARCH_ARM64)
6098 /* mov vectmp0, [gstreg] */
6099 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6100
6101 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6102 {
6103 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6104 /* eor vectmp0, vectmp0, idxSimdReg */
6105 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6106 /* cnt vectmp0, vectmp0, #0*/
6107 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6108 /* umov tmp0, vectmp0.D[0] */
6109 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6110 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6111 /* cbz tmp0, +1 */
6112 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6113 /* brk #0x1000+enmGstReg */
6114 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6115 }
6116
6117 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6118 {
6119 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6120 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6121 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6122 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6123 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6124 /* umov tmp0, (vectmp0 + 1).D[0] */
6125 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6126 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6127 /* cbz tmp0, +1 */
6128 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6129 /* brk #0x1000+enmGstReg */
6130 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6131 }
6132
6133# else
6134# error "Port me!"
6135# endif
6136
6137 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6138 return off;
6139}
6140# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6141
6142
6143/**
6144 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6145 * important bits.
6146 *
6147 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6148 * Trashes EFLAGS on AMD64.
6149 */
6150DECL_HIDDEN_THROW(uint32_t)
6151iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6152{
6153 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6154 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6155 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6156 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6157
6158#ifdef RT_ARCH_AMD64
6159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6160
6161 /* je/jz +1 */
6162 pbCodeBuf[off++] = 0x74;
6163 pbCodeBuf[off++] = 0x01;
6164
6165 /* int3 */
6166 pbCodeBuf[off++] = 0xcc;
6167
6168# elif defined(RT_ARCH_ARM64)
6169 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6170
6171 /* b.eq +1 */
6172 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6173 /* brk #0x2000 */
6174 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6175
6176# else
6177# error "Port me!"
6178# endif
6179 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6180
6181 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6182 return off;
6183}
6184
6185#endif /* VBOX_STRICT */
6186
6187
6188#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6189/**
6190 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6191 */
6192DECL_HIDDEN_THROW(uint32_t)
6193iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6194{
6195 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6196
6197 fEflNeeded &= X86_EFL_STATUS_BITS;
6198 if (fEflNeeded)
6199 {
6200# ifdef RT_ARCH_AMD64
6201 /* test dword [pVCpu + offVCpu], imm32 */
6202 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6203 if (fEflNeeded <= 0xff)
6204 {
6205 pCodeBuf[off++] = 0xf6;
6206 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6207 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6208 }
6209 else
6210 {
6211 pCodeBuf[off++] = 0xf7;
6212 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6213 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6214 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6215 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6216 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6217 }
6218 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6219
6220# else
6221 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6222 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6223 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6224# ifdef RT_ARCH_ARM64
6225 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6226 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6227# else
6228# error "Port me!"
6229# endif
6230 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6231# endif
6232 }
6233 return off;
6234}
6235#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6236
6237
6238/**
6239 * Emits a code for checking the return code of a call and rcPassUp, returning
6240 * from the code if either are non-zero.
6241 */
6242DECL_HIDDEN_THROW(uint32_t)
6243iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6244{
6245#ifdef RT_ARCH_AMD64
6246 /*
6247 * AMD64: eax = call status code.
6248 */
6249
6250 /* edx = rcPassUp */
6251 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6252# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6253 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6254# endif
6255
6256 /* edx = eax | rcPassUp */
6257 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6258 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6259 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6260 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6261
6262 /* Jump to non-zero status return path. */
6263 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6264
6265 /* done. */
6266
6267#elif RT_ARCH_ARM64
6268 /*
6269 * ARM64: w0 = call status code.
6270 */
6271# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6272 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6273# endif
6274 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6275
6276 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6277
6278 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6279
6280 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6281 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6282 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6283
6284#else
6285# error "port me"
6286#endif
6287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6288 RT_NOREF_PV(idxInstr);
6289 return off;
6290}
6291
6292
6293/**
6294 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6295 * raising a \#GP(0) if it isn't.
6296 *
6297 * @returns New code buffer offset, UINT32_MAX on failure.
6298 * @param pReNative The native recompile state.
6299 * @param off The code buffer offset.
6300 * @param idxAddrReg The host register with the address to check.
6301 * @param idxInstr The current instruction.
6302 */
6303DECL_HIDDEN_THROW(uint32_t)
6304iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6305{
6306 /*
6307 * Make sure we don't have any outstanding guest register writes as we may
6308 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6309 */
6310 off = iemNativeRegFlushPendingWrites(pReNative, off);
6311
6312#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6313 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6314#else
6315 RT_NOREF(idxInstr);
6316#endif
6317
6318#ifdef RT_ARCH_AMD64
6319 /*
6320 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6321 * return raisexcpt();
6322 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6323 */
6324 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6325
6326 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6327 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6328 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6329 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6330 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6331
6332 iemNativeRegFreeTmp(pReNative, iTmpReg);
6333
6334#elif defined(RT_ARCH_ARM64)
6335 /*
6336 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6337 * return raisexcpt();
6338 * ----
6339 * mov x1, 0x800000000000
6340 * add x1, x0, x1
6341 * cmp xzr, x1, lsr 48
6342 * b.ne .Lraisexcpt
6343 */
6344 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6345
6346 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6347 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6348 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6349 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6350
6351 iemNativeRegFreeTmp(pReNative, iTmpReg);
6352
6353#else
6354# error "Port me"
6355#endif
6356 return off;
6357}
6358
6359
6360/**
6361 * Emits code to check if that the content of @a idxAddrReg is within the limit
6362 * of CS, raising a \#GP(0) if it isn't.
6363 *
6364 * @returns New code buffer offset; throws VBox status code on error.
6365 * @param pReNative The native recompile state.
6366 * @param off The code buffer offset.
6367 * @param idxAddrReg The host register (32-bit) with the address to
6368 * check.
6369 * @param idxInstr The current instruction.
6370 */
6371DECL_HIDDEN_THROW(uint32_t)
6372iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6373 uint8_t idxAddrReg, uint8_t idxInstr)
6374{
6375 /*
6376 * Make sure we don't have any outstanding guest register writes as we may
6377 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6378 */
6379 off = iemNativeRegFlushPendingWrites(pReNative, off);
6380
6381#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6382 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6383#else
6384 RT_NOREF(idxInstr);
6385#endif
6386
6387 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6388 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6389 kIemNativeGstRegUse_ReadOnly);
6390
6391 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6392 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6393
6394 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6395 return off;
6396}
6397
6398
6399/**
6400 * Emits a call to a CImpl function or something similar.
6401 */
6402DECL_HIDDEN_THROW(uint32_t)
6403iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6404 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6405{
6406 /* Writeback everything. */
6407 off = iemNativeRegFlushPendingWrites(pReNative, off);
6408
6409 /*
6410 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6411 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6412 */
6413 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6414 fGstShwFlush
6415 | RT_BIT_64(kIemNativeGstReg_Pc)
6416 | RT_BIT_64(kIemNativeGstReg_EFlags));
6417 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6418
6419 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6420
6421 /*
6422 * Load the parameters.
6423 */
6424#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6425 /* Special code the hidden VBOXSTRICTRC pointer. */
6426 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6427 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6428 if (cAddParams > 0)
6429 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6430 if (cAddParams > 1)
6431 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6432 if (cAddParams > 2)
6433 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6434 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6435
6436#else
6437 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6438 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6439 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6440 if (cAddParams > 0)
6441 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6442 if (cAddParams > 1)
6443 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6444 if (cAddParams > 2)
6445# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6446 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6447# else
6448 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6449# endif
6450#endif
6451
6452 /*
6453 * Make the call.
6454 */
6455 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6456
6457#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6458 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6459#endif
6460
6461 /*
6462 * Check the status code.
6463 */
6464 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6465}
6466
6467
6468/**
6469 * Emits a call to a threaded worker function.
6470 */
6471DECL_HIDDEN_THROW(uint32_t)
6472iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6473{
6474 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6475
6476 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6477 off = iemNativeRegFlushPendingWrites(pReNative, off);
6478
6479 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6480 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6481
6482#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6483 /* The threaded function may throw / long jmp, so set current instruction
6484 number if we're counting. */
6485 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6486#endif
6487
6488 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6489
6490#ifdef RT_ARCH_AMD64
6491 /* Load the parameters and emit the call. */
6492# ifdef RT_OS_WINDOWS
6493# ifndef VBOXSTRICTRC_STRICT_ENABLED
6494 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6495 if (cParams > 0)
6496 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6497 if (cParams > 1)
6498 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6499 if (cParams > 2)
6500 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6501# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6502 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6503 if (cParams > 0)
6504 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6505 if (cParams > 1)
6506 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6507 if (cParams > 2)
6508 {
6509 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6510 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6511 }
6512 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6513# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6514# else
6515 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6516 if (cParams > 0)
6517 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6518 if (cParams > 1)
6519 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6520 if (cParams > 2)
6521 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6522# endif
6523
6524 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6525
6526# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6527 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6528# endif
6529
6530#elif RT_ARCH_ARM64
6531 /*
6532 * ARM64:
6533 */
6534 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6535 if (cParams > 0)
6536 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6537 if (cParams > 1)
6538 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6539 if (cParams > 2)
6540 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6541
6542 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6543
6544#else
6545# error "port me"
6546#endif
6547
6548 /*
6549 * Check the status code.
6550 */
6551 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6552
6553 return off;
6554}
6555
6556#ifdef VBOX_WITH_STATISTICS
6557/**
6558 * Emits code to update the thread call statistics.
6559 */
6560DECL_INLINE_THROW(uint32_t)
6561iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6562{
6563 /*
6564 * Update threaded function stats.
6565 */
6566 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6567 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6568# if defined(RT_ARCH_ARM64)
6569 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6570 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6571 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6572 iemNativeRegFreeTmp(pReNative, idxTmp1);
6573 iemNativeRegFreeTmp(pReNative, idxTmp2);
6574# else
6575 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6576# endif
6577 return off;
6578}
6579#endif /* VBOX_WITH_STATISTICS */
6580
6581
6582/**
6583 * Emits the code at the ReturnWithFlags label (returns
6584 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6585 */
6586static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6587{
6588 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6589 if (idxLabel != UINT32_MAX)
6590 {
6591 iemNativeLabelDefine(pReNative, idxLabel, off);
6592
6593 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6594
6595 /* jump back to the return sequence. */
6596 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6597 }
6598 return off;
6599}
6600
6601
6602/**
6603 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6604 */
6605static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6606{
6607 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6608 if (idxLabel != UINT32_MAX)
6609 {
6610 iemNativeLabelDefine(pReNative, idxLabel, off);
6611
6612 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6613
6614 /* jump back to the return sequence. */
6615 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6616 }
6617 return off;
6618}
6619
6620
6621/**
6622 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6623 */
6624static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6625{
6626 /*
6627 * Generate the rc + rcPassUp fiddling code if needed.
6628 */
6629 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6630 if (idxLabel != UINT32_MAX)
6631 {
6632 iemNativeLabelDefine(pReNative, idxLabel, off);
6633
6634 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6635#ifdef RT_ARCH_AMD64
6636# ifdef RT_OS_WINDOWS
6637# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6638 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6639# endif
6640 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6641 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6642# else
6643 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6644 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6645# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6646 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6647# endif
6648# endif
6649# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6650 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6651# endif
6652
6653#else
6654 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6655 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6656 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6657#endif
6658
6659 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6660 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6661 }
6662 return off;
6663}
6664
6665
6666/**
6667 * Emits a standard epilog.
6668 */
6669static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6670{
6671 *pidxReturnLabel = UINT32_MAX;
6672
6673 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6674 off = iemNativeRegFlushPendingWrites(pReNative, off);
6675
6676 /*
6677 * Successful return, so clear the return register (eax, w0).
6678 */
6679 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6680
6681 /*
6682 * Define label for common return point.
6683 */
6684 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6685 *pidxReturnLabel = idxReturn;
6686
6687 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6688
6689 /*
6690 * Restore registers and return.
6691 */
6692#ifdef RT_ARCH_AMD64
6693 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6694
6695 /* Reposition esp at the r15 restore point. */
6696 pbCodeBuf[off++] = X86_OP_REX_W;
6697 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6698 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6699 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6700
6701 /* Pop non-volatile registers and return */
6702 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6703 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6704 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6705 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6706 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6707 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6708 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6709 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6710# ifdef RT_OS_WINDOWS
6711 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6712 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6713# endif
6714 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6715 pbCodeBuf[off++] = 0xc9; /* leave */
6716 pbCodeBuf[off++] = 0xc3; /* ret */
6717 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6718
6719#elif RT_ARCH_ARM64
6720 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6721
6722 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6723 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6724 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6725 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6726 IEMNATIVE_FRAME_VAR_SIZE / 8);
6727 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6728 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6729 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6730 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6731 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6732 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6733 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6734 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6735 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6736 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6737 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6738 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6739
6740 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6741 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6742 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6743 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6744
6745 /* retab / ret */
6746# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6747 if (1)
6748 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6749 else
6750# endif
6751 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6752
6753#else
6754# error "port me"
6755#endif
6756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6757
6758 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6759}
6760
6761
6762/**
6763 * Emits a standard prolog.
6764 */
6765static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6766{
6767#ifdef RT_ARCH_AMD64
6768 /*
6769 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6770 * reserving 64 bytes for stack variables plus 4 non-register argument
6771 * slots. Fixed register assignment: xBX = pReNative;
6772 *
6773 * Since we always do the same register spilling, we can use the same
6774 * unwind description for all the code.
6775 */
6776 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6777 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6778 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6779 pbCodeBuf[off++] = 0x8b;
6780 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6781 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6782 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6783# ifdef RT_OS_WINDOWS
6784 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6785 pbCodeBuf[off++] = 0x8b;
6786 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6787 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6788 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6789# else
6790 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6791 pbCodeBuf[off++] = 0x8b;
6792 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6793# endif
6794 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6795 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6796 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6797 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6798 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6799 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6800 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6801 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6802
6803# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6804 /* Save the frame pointer. */
6805 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6806# endif
6807
6808 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6809 X86_GREG_xSP,
6810 IEMNATIVE_FRAME_ALIGN_SIZE
6811 + IEMNATIVE_FRAME_VAR_SIZE
6812 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6813 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6814 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6815 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6816 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6817
6818#elif RT_ARCH_ARM64
6819 /*
6820 * We set up a stack frame exactly like on x86, only we have to push the
6821 * return address our selves here. We save all non-volatile registers.
6822 */
6823 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6824
6825# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6826 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6827 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6828 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6829 /* pacibsp */
6830 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6831# endif
6832
6833 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6834 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6835 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6836 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6837 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6838 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6839 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6840 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6841 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6842 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6843 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6844 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6845 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6846 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6847 /* Save the BP and LR (ret address) registers at the top of the frame. */
6848 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6849 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6850 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6851 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6852 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6853 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6854
6855 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6856 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6857
6858 /* mov r28, r0 */
6859 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6860 /* mov r27, r1 */
6861 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6862
6863# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6864 /* Save the frame pointer. */
6865 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6866 ARMV8_A64_REG_X2);
6867# endif
6868
6869#else
6870# error "port me"
6871#endif
6872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6873 return off;
6874}
6875
6876
6877/*********************************************************************************************************************************
6878* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6879*********************************************************************************************************************************/
6880
6881/**
6882 * Internal work that allocates a variable with kind set to
6883 * kIemNativeVarKind_Invalid and no current stack allocation.
6884 *
6885 * The kind will either be set by the caller or later when the variable is first
6886 * assigned a value.
6887 *
6888 * @returns Unpacked index.
6889 * @internal
6890 */
6891static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6892{
6893 Assert(cbType > 0 && cbType <= 64);
6894 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6895 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6896 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6897 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6898 pReNative->Core.aVars[idxVar].cbVar = cbType;
6899 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6900 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6901 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6902 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6903 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6904 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6905 pReNative->Core.aVars[idxVar].u.uValue = 0;
6906 return idxVar;
6907}
6908
6909
6910/**
6911 * Internal work that allocates an argument variable w/o setting enmKind.
6912 *
6913 * @returns Unpacked index.
6914 * @internal
6915 */
6916static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6917{
6918 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6919 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6920 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6921
6922 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6923 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6924 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6925 return idxVar;
6926}
6927
6928
6929/**
6930 * Gets the stack slot for a stack variable, allocating one if necessary.
6931 *
6932 * Calling this function implies that the stack slot will contain a valid
6933 * variable value. The caller deals with any register currently assigned to the
6934 * variable, typically by spilling it into the stack slot.
6935 *
6936 * @returns The stack slot number.
6937 * @param pReNative The recompiler state.
6938 * @param idxVar The variable.
6939 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6940 */
6941DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6942{
6943 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6944 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6945 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6946
6947 /* Already got a slot? */
6948 uint8_t const idxStackSlot = pVar->idxStackSlot;
6949 if (idxStackSlot != UINT8_MAX)
6950 {
6951 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6952 return idxStackSlot;
6953 }
6954
6955 /*
6956 * A single slot is easy to allocate.
6957 * Allocate them from the top end, closest to BP, to reduce the displacement.
6958 */
6959 if (pVar->cbVar <= sizeof(uint64_t))
6960 {
6961 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6962 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6963 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6964 pVar->idxStackSlot = (uint8_t)iSlot;
6965 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6966 return (uint8_t)iSlot;
6967 }
6968
6969 /*
6970 * We need more than one stack slot.
6971 *
6972 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6973 */
6974 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6975 Assert(pVar->cbVar <= 64);
6976 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6977 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6978 uint32_t bmStack = ~pReNative->Core.bmStack;
6979 while (bmStack != UINT32_MAX)
6980 {
6981/** @todo allocate from the top to reduce BP displacement. */
6982 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6983 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6984 if (!(iSlot & fBitAlignMask))
6985 {
6986 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6987 {
6988 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6989 pVar->idxStackSlot = (uint8_t)iSlot;
6990 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6991 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6992 return (uint8_t)iSlot;
6993 }
6994 }
6995 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6996 }
6997 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6998}
6999
7000
7001/**
7002 * Changes the variable to a stack variable.
7003 *
7004 * Currently this is s only possible to do the first time the variable is used,
7005 * switching later is can be implemented but not done.
7006 *
7007 * @param pReNative The recompiler state.
7008 * @param idxVar The variable.
7009 * @throws VERR_IEM_VAR_IPE_2
7010 */
7011DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7012{
7013 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7014 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7015 if (pVar->enmKind != kIemNativeVarKind_Stack)
7016 {
7017 /* We could in theory transition from immediate to stack as well, but it
7018 would involve the caller doing work storing the value on the stack. So,
7019 till that's required we only allow transition from invalid. */
7020 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7021 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7022 pVar->enmKind = kIemNativeVarKind_Stack;
7023
7024 /* Note! We don't allocate a stack slot here, that's only done when a
7025 slot is actually needed to hold a variable value. */
7026 }
7027}
7028
7029
7030/**
7031 * Sets it to a variable with a constant value.
7032 *
7033 * This does not require stack storage as we know the value and can always
7034 * reload it, unless of course it's referenced.
7035 *
7036 * @param pReNative The recompiler state.
7037 * @param idxVar The variable.
7038 * @param uValue The immediate value.
7039 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7040 */
7041DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7042{
7043 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7044 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7045 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7046 {
7047 /* Only simple transitions for now. */
7048 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7049 pVar->enmKind = kIemNativeVarKind_Immediate;
7050 }
7051 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7052
7053 pVar->u.uValue = uValue;
7054 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7055 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7056 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7057}
7058
7059
7060/**
7061 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7062 *
7063 * This does not require stack storage as we know the value and can always
7064 * reload it. Loading is postponed till needed.
7065 *
7066 * @param pReNative The recompiler state.
7067 * @param idxVar The variable. Unpacked.
7068 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7069 *
7070 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7071 * @internal
7072 */
7073static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7074{
7075 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7076 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7077
7078 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7079 {
7080 /* Only simple transitions for now. */
7081 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7082 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7083 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7084 }
7085 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7086
7087 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7088
7089 /* Update the other variable, ensure it's a stack variable. */
7090 /** @todo handle variables with const values... that'll go boom now. */
7091 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7092 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7093}
7094
7095
7096/**
7097 * Sets the variable to a reference (pointer) to a guest register reference.
7098 *
7099 * This does not require stack storage as we know the value and can always
7100 * reload it. Loading is postponed till needed.
7101 *
7102 * @param pReNative The recompiler state.
7103 * @param idxVar The variable.
7104 * @param enmRegClass The class guest registers to reference.
7105 * @param idxReg The register within @a enmRegClass to reference.
7106 *
7107 * @throws VERR_IEM_VAR_IPE_2
7108 */
7109DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7110 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7111{
7112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7113 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7114
7115 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7116 {
7117 /* Only simple transitions for now. */
7118 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7119 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7120 }
7121 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7122
7123 pVar->u.GstRegRef.enmClass = enmRegClass;
7124 pVar->u.GstRegRef.idx = idxReg;
7125}
7126
7127
7128DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7129{
7130 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7131}
7132
7133
7134DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7135{
7136 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7137
7138 /* Since we're using a generic uint64_t value type, we must truncate it if
7139 the variable is smaller otherwise we may end up with too large value when
7140 scaling up a imm8 w/ sign-extension.
7141
7142 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7143 in the bios, bx=1) when running on arm, because clang expect 16-bit
7144 register parameters to have bits 16 and up set to zero. Instead of
7145 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7146 CF value in the result. */
7147 switch (cbType)
7148 {
7149 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7150 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7151 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7152 }
7153 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7154 return idxVar;
7155}
7156
7157
7158DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7159{
7160 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7161 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7162 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7163 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7164 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7165 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7166
7167 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7168 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7169 return idxArgVar;
7170}
7171
7172
7173DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7174{
7175 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7176 /* Don't set to stack now, leave that to the first use as for instance
7177 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7178 return idxVar;
7179}
7180
7181
7182DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7183{
7184 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7185
7186 /* Since we're using a generic uint64_t value type, we must truncate it if
7187 the variable is smaller otherwise we may end up with too large value when
7188 scaling up a imm8 w/ sign-extension. */
7189 switch (cbType)
7190 {
7191 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7192 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7193 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7194 }
7195 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7196 return idxVar;
7197}
7198
7199
7200/**
7201 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7202 * fixed till we call iemNativeVarRegisterRelease.
7203 *
7204 * @returns The host register number.
7205 * @param pReNative The recompiler state.
7206 * @param idxVar The variable.
7207 * @param poff Pointer to the instruction buffer offset.
7208 * In case a register needs to be freed up or the value
7209 * loaded off the stack.
7210 * @param fInitialized Set if the variable must already have been initialized.
7211 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7212 * the case.
7213 * @param idxRegPref Preferred register number or UINT8_MAX.
7214 */
7215DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7216 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7217{
7218 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7219 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7220 Assert(pVar->cbVar <= 8);
7221 Assert(!pVar->fRegAcquired);
7222
7223 uint8_t idxReg = pVar->idxReg;
7224 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7225 {
7226 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7227 && pVar->enmKind < kIemNativeVarKind_End);
7228 pVar->fRegAcquired = true;
7229 return idxReg;
7230 }
7231
7232 /*
7233 * If the kind of variable has not yet been set, default to 'stack'.
7234 */
7235 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7236 && pVar->enmKind < kIemNativeVarKind_End);
7237 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7238 iemNativeVarSetKindToStack(pReNative, idxVar);
7239
7240 /*
7241 * We have to allocate a register for the variable, even if its a stack one
7242 * as we don't know if there are modification being made to it before its
7243 * finalized (todo: analyze and insert hints about that?).
7244 *
7245 * If we can, we try get the correct register for argument variables. This
7246 * is assuming that most argument variables are fetched as close as possible
7247 * to the actual call, so that there aren't any interfering hidden calls
7248 * (memory accesses, etc) inbetween.
7249 *
7250 * If we cannot or it's a variable, we make sure no argument registers
7251 * that will be used by this MC block will be allocated here, and we always
7252 * prefer non-volatile registers to avoid needing to spill stuff for internal
7253 * call.
7254 */
7255 /** @todo Detect too early argument value fetches and warn about hidden
7256 * calls causing less optimal code to be generated in the python script. */
7257
7258 uint8_t const uArgNo = pVar->uArgNo;
7259 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7260 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7261 {
7262 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7263 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7264 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7265 }
7266 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7267 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7268 {
7269 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7270 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7271 & ~pReNative->Core.bmHstRegsWithGstShadow
7272 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7273 & fNotArgsMask;
7274 if (fRegs)
7275 {
7276 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7277 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7278 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7279 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7280 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7281 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7282 }
7283 else
7284 {
7285 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7286 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7287 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7288 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7289 }
7290 }
7291 else
7292 {
7293 idxReg = idxRegPref;
7294 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7295 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7296 }
7297 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7298 pVar->idxReg = idxReg;
7299
7300 /*
7301 * Load it off the stack if we've got a stack slot.
7302 */
7303 uint8_t const idxStackSlot = pVar->idxStackSlot;
7304 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7305 {
7306 Assert(fInitialized);
7307 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7308 switch (pVar->cbVar)
7309 {
7310 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7311 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7312 case 3: AssertFailed(); RT_FALL_THRU();
7313 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7314 default: AssertFailed(); RT_FALL_THRU();
7315 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7316 }
7317 }
7318 else
7319 {
7320 Assert(idxStackSlot == UINT8_MAX);
7321 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7322 }
7323 pVar->fRegAcquired = true;
7324 return idxReg;
7325}
7326
7327
7328/**
7329 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7330 * guest register.
7331 *
7332 * This function makes sure there is a register for it and sets it to be the
7333 * current shadow copy of @a enmGstReg.
7334 *
7335 * @returns The host register number.
7336 * @param pReNative The recompiler state.
7337 * @param idxVar The variable.
7338 * @param enmGstReg The guest register this variable will be written to
7339 * after this call.
7340 * @param poff Pointer to the instruction buffer offset.
7341 * In case a register needs to be freed up or if the
7342 * variable content needs to be loaded off the stack.
7343 *
7344 * @note We DO NOT expect @a idxVar to be an argument variable,
7345 * because we can only in the commit stage of an instruction when this
7346 * function is used.
7347 */
7348DECL_HIDDEN_THROW(uint8_t)
7349iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7350{
7351 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7352 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7353 Assert(!pVar->fRegAcquired);
7354 AssertMsgStmt( pVar->cbVar <= 8
7355 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7356 || pVar->enmKind == kIemNativeVarKind_Stack),
7357 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7358 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7360
7361 /*
7362 * This shouldn't ever be used for arguments, unless it's in a weird else
7363 * branch that doesn't do any calling and even then it's questionable.
7364 *
7365 * However, in case someone writes crazy wrong MC code and does register
7366 * updates before making calls, just use the regular register allocator to
7367 * ensure we get a register suitable for the intended argument number.
7368 */
7369 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7370
7371 /*
7372 * If there is already a register for the variable, we transfer/set the
7373 * guest shadow copy assignment to it.
7374 */
7375 uint8_t idxReg = pVar->idxReg;
7376 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7377 {
7378 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7379 {
7380 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7381 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7382 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7383 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7384 }
7385 else
7386 {
7387 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7388 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7389 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7390 }
7391 /** @todo figure this one out. We need some way of making sure the register isn't
7392 * modified after this point, just in case we start writing crappy MC code. */
7393 pVar->enmGstReg = enmGstReg;
7394 pVar->fRegAcquired = true;
7395 return idxReg;
7396 }
7397 Assert(pVar->uArgNo == UINT8_MAX);
7398
7399 /*
7400 * Because this is supposed to be the commit stage, we're just tag along with the
7401 * temporary register allocator and upgrade it to a variable register.
7402 */
7403 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7404 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7405 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7406 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7407 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7408 pVar->idxReg = idxReg;
7409
7410 /*
7411 * Now we need to load the register value.
7412 */
7413 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7414 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7415 else
7416 {
7417 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7418 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7419 switch (pVar->cbVar)
7420 {
7421 case sizeof(uint64_t):
7422 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7423 break;
7424 case sizeof(uint32_t):
7425 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7426 break;
7427 case sizeof(uint16_t):
7428 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7429 break;
7430 case sizeof(uint8_t):
7431 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7432 break;
7433 default:
7434 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7435 }
7436 }
7437
7438 pVar->fRegAcquired = true;
7439 return idxReg;
7440}
7441
7442
7443/**
7444 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7445 *
7446 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7447 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7448 * requirement of flushing anything in volatile host registers when making a
7449 * call.
7450 *
7451 * @returns New @a off value.
7452 * @param pReNative The recompiler state.
7453 * @param off The code buffer position.
7454 * @param fHstRegsNotToSave Set of registers not to save & restore.
7455 */
7456DECL_HIDDEN_THROW(uint32_t)
7457iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7458{
7459 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7460 if (fHstRegs)
7461 {
7462 do
7463 {
7464 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7465 fHstRegs &= ~RT_BIT_32(idxHstReg);
7466
7467 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7468 {
7469 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7470 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7471 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7472 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7473 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7474 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7475 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7476 {
7477 case kIemNativeVarKind_Stack:
7478 {
7479 /* Temporarily spill the variable register. */
7480 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7481 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7482 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7483 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7484 continue;
7485 }
7486
7487 case kIemNativeVarKind_Immediate:
7488 case kIemNativeVarKind_VarRef:
7489 case kIemNativeVarKind_GstRegRef:
7490 /* It is weird to have any of these loaded at this point. */
7491 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7492 continue;
7493
7494 case kIemNativeVarKind_End:
7495 case kIemNativeVarKind_Invalid:
7496 break;
7497 }
7498 AssertFailed();
7499 }
7500 else
7501 {
7502 /*
7503 * Allocate a temporary stack slot and spill the register to it.
7504 */
7505 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7506 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7507 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7508 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7509 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7510 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7511 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7512 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7513 }
7514 } while (fHstRegs);
7515 }
7516 return off;
7517}
7518
7519
7520/**
7521 * Emit code to restore volatile registers after to a call to a helper.
7522 *
7523 * @returns New @a off value.
7524 * @param pReNative The recompiler state.
7525 * @param off The code buffer position.
7526 * @param fHstRegsNotToSave Set of registers not to save & restore.
7527 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7528 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7529 */
7530DECL_HIDDEN_THROW(uint32_t)
7531iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7532{
7533 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7534 if (fHstRegs)
7535 {
7536 do
7537 {
7538 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7539 fHstRegs &= ~RT_BIT_32(idxHstReg);
7540
7541 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7542 {
7543 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7544 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7545 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7546 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7547 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7548 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7549 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7550 {
7551 case kIemNativeVarKind_Stack:
7552 {
7553 /* Unspill the variable register. */
7554 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7555 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7556 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7557 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7558 continue;
7559 }
7560
7561 case kIemNativeVarKind_Immediate:
7562 case kIemNativeVarKind_VarRef:
7563 case kIemNativeVarKind_GstRegRef:
7564 /* It is weird to have any of these loaded at this point. */
7565 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7566 continue;
7567
7568 case kIemNativeVarKind_End:
7569 case kIemNativeVarKind_Invalid:
7570 break;
7571 }
7572 AssertFailed();
7573 }
7574 else
7575 {
7576 /*
7577 * Restore from temporary stack slot.
7578 */
7579 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7580 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7581 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7582 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7583
7584 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7585 }
7586 } while (fHstRegs);
7587 }
7588 return off;
7589}
7590
7591
7592/**
7593 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7594 *
7595 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7596 *
7597 * ASSUMES that @a idxVar is valid and unpacked.
7598 */
7599DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7600{
7601 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7602 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7603 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7604 {
7605 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7606 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7607 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7608 Assert(cSlots > 0);
7609 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7610 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7611 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7612 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7613 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7614 }
7615 else
7616 Assert(idxStackSlot == UINT8_MAX);
7617}
7618
7619
7620/**
7621 * Worker that frees a single variable.
7622 *
7623 * ASSUMES that @a idxVar is valid and unpacked.
7624 */
7625DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7626{
7627 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7628 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7629 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7630
7631 /* Free the host register first if any assigned. */
7632 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7633 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7634 {
7635 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7636 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7637 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7638 }
7639
7640 /* Free argument mapping. */
7641 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7642 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7643 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7644
7645 /* Free the stack slots. */
7646 iemNativeVarFreeStackSlots(pReNative, idxVar);
7647
7648 /* Free the actual variable. */
7649 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7650 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7651}
7652
7653
7654/**
7655 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7656 */
7657DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7658{
7659 while (bmVars != 0)
7660 {
7661 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7662 bmVars &= ~RT_BIT_32(idxVar);
7663
7664#if 1 /** @todo optimize by simplifying this later... */
7665 iemNativeVarFreeOneWorker(pReNative, idxVar);
7666#else
7667 /* Only need to free the host register, the rest is done as bulk updates below. */
7668 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7669 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7670 {
7671 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7672 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7673 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7674 }
7675#endif
7676 }
7677#if 0 /** @todo optimize by simplifying this later... */
7678 pReNative->Core.bmVars = 0;
7679 pReNative->Core.bmStack = 0;
7680 pReNative->Core.u64ArgVars = UINT64_MAX;
7681#endif
7682}
7683
7684
7685
7686/*********************************************************************************************************************************
7687* Emitters for IEM_MC_CALL_CIMPL_XXX *
7688*********************************************************************************************************************************/
7689
7690/**
7691 * Emits code to load a reference to the given guest register into @a idxGprDst.
7692 */
7693DECL_HIDDEN_THROW(uint32_t)
7694iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7695 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7696{
7697#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7698 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7699#endif
7700
7701 /*
7702 * Get the offset relative to the CPUMCTX structure.
7703 */
7704 uint32_t offCpumCtx;
7705 switch (enmClass)
7706 {
7707 case kIemNativeGstRegRef_Gpr:
7708 Assert(idxRegInClass < 16);
7709 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7710 break;
7711
7712 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7713 Assert(idxRegInClass < 4);
7714 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7715 break;
7716
7717 case kIemNativeGstRegRef_EFlags:
7718 Assert(idxRegInClass == 0);
7719 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7720 break;
7721
7722 case kIemNativeGstRegRef_MxCsr:
7723 Assert(idxRegInClass == 0);
7724 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7725 break;
7726
7727 case kIemNativeGstRegRef_FpuReg:
7728 Assert(idxRegInClass < 8);
7729 AssertFailed(); /** @todo what kind of indexing? */
7730 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7731 break;
7732
7733 case kIemNativeGstRegRef_MReg:
7734 Assert(idxRegInClass < 8);
7735 AssertFailed(); /** @todo what kind of indexing? */
7736 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7737 break;
7738
7739 case kIemNativeGstRegRef_XReg:
7740 Assert(idxRegInClass < 16);
7741 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7742 break;
7743
7744 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7745 Assert(idxRegInClass == 0);
7746 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7747 break;
7748
7749 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7750 Assert(idxRegInClass == 0);
7751 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7752 break;
7753
7754 default:
7755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7756 }
7757
7758 /*
7759 * Load the value into the destination register.
7760 */
7761#ifdef RT_ARCH_AMD64
7762 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7763
7764#elif defined(RT_ARCH_ARM64)
7765 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7766 Assert(offCpumCtx < 4096);
7767 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7768
7769#else
7770# error "Port me!"
7771#endif
7772
7773 return off;
7774}
7775
7776
7777/**
7778 * Common code for CIMPL and AIMPL calls.
7779 *
7780 * These are calls that uses argument variables and such. They should not be
7781 * confused with internal calls required to implement an MC operation,
7782 * like a TLB load and similar.
7783 *
7784 * Upon return all that is left to do is to load any hidden arguments and
7785 * perform the call. All argument variables are freed.
7786 *
7787 * @returns New code buffer offset; throws VBox status code on error.
7788 * @param pReNative The native recompile state.
7789 * @param off The code buffer offset.
7790 * @param cArgs The total nubmer of arguments (includes hidden
7791 * count).
7792 * @param cHiddenArgs The number of hidden arguments. The hidden
7793 * arguments must not have any variable declared for
7794 * them, whereas all the regular arguments must
7795 * (tstIEMCheckMc ensures this).
7796 */
7797DECL_HIDDEN_THROW(uint32_t)
7798iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7799{
7800#ifdef VBOX_STRICT
7801 /*
7802 * Assert sanity.
7803 */
7804 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7805 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7806 for (unsigned i = 0; i < cHiddenArgs; i++)
7807 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7808 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7809 {
7810 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7811 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7812 }
7813 iemNativeRegAssertSanity(pReNative);
7814#endif
7815
7816 /* We don't know what the called function makes use of, so flush any pending register writes. */
7817 off = iemNativeRegFlushPendingWrites(pReNative, off);
7818
7819 /*
7820 * Before we do anything else, go over variables that are referenced and
7821 * make sure they are not in a register.
7822 */
7823 uint32_t bmVars = pReNative->Core.bmVars;
7824 if (bmVars)
7825 {
7826 do
7827 {
7828 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7829 bmVars &= ~RT_BIT_32(idxVar);
7830
7831 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7832 {
7833 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7834 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7835 {
7836 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7837 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7838 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7839 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7840 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7841
7842 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7843 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7844 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7845 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7846 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7847 }
7848 }
7849 } while (bmVars != 0);
7850#if 0 //def VBOX_STRICT
7851 iemNativeRegAssertSanity(pReNative);
7852#endif
7853 }
7854
7855 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7856
7857 /*
7858 * First, go over the host registers that will be used for arguments and make
7859 * sure they either hold the desired argument or are free.
7860 */
7861 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7862 {
7863 for (uint32_t i = 0; i < cRegArgs; i++)
7864 {
7865 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7866 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7867 {
7868 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7869 {
7870 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7872 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7873 Assert(pVar->idxReg == idxArgReg);
7874 uint8_t const uArgNo = pVar->uArgNo;
7875 if (uArgNo == i)
7876 { /* prefect */ }
7877 /* The variable allocator logic should make sure this is impossible,
7878 except for when the return register is used as a parameter (ARM,
7879 but not x86). */
7880#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7881 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7882 {
7883# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7884# error "Implement this"
7885# endif
7886 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7887 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7888 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7889 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7890 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7891 }
7892#endif
7893 else
7894 {
7895 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7896
7897 if (pVar->enmKind == kIemNativeVarKind_Stack)
7898 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7899 else
7900 {
7901 /* just free it, can be reloaded if used again */
7902 pVar->idxReg = UINT8_MAX;
7903 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7904 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7905 }
7906 }
7907 }
7908 else
7909 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7910 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7911 }
7912 }
7913#if 0 //def VBOX_STRICT
7914 iemNativeRegAssertSanity(pReNative);
7915#endif
7916 }
7917
7918 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7919
7920#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7921 /*
7922 * If there are any stack arguments, make sure they are in their place as well.
7923 *
7924 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7925 * the caller) be loading it later and it must be free (see first loop).
7926 */
7927 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7928 {
7929 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7930 {
7931 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7932 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7933 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7934 {
7935 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7936 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7937 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7938 pVar->idxReg = UINT8_MAX;
7939 }
7940 else
7941 {
7942 /* Use ARG0 as temp for stuff we need registers for. */
7943 switch (pVar->enmKind)
7944 {
7945 case kIemNativeVarKind_Stack:
7946 {
7947 uint8_t const idxStackSlot = pVar->idxStackSlot;
7948 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7949 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7950 iemNativeStackCalcBpDisp(idxStackSlot));
7951 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7952 continue;
7953 }
7954
7955 case kIemNativeVarKind_Immediate:
7956 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7957 continue;
7958
7959 case kIemNativeVarKind_VarRef:
7960 {
7961 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7962 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7963 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7964 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7965 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7966 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7967 {
7968 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7969 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7970 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7971 }
7972 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7973 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7974 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7975 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7976 continue;
7977 }
7978
7979 case kIemNativeVarKind_GstRegRef:
7980 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7981 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
7982 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7983 continue;
7984
7985 case kIemNativeVarKind_Invalid:
7986 case kIemNativeVarKind_End:
7987 break;
7988 }
7989 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7990 }
7991 }
7992# if 0 //def VBOX_STRICT
7993 iemNativeRegAssertSanity(pReNative);
7994# endif
7995 }
7996#else
7997 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7998#endif
7999
8000 /*
8001 * Make sure the argument variables are loaded into their respective registers.
8002 *
8003 * We can optimize this by ASSUMING that any register allocations are for
8004 * registeres that have already been loaded and are ready. The previous step
8005 * saw to that.
8006 */
8007 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8008 {
8009 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8010 {
8011 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8012 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8013 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8014 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8015 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8016 else
8017 {
8018 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8019 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8020 {
8021 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8022 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8023 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8024 | RT_BIT_32(idxArgReg);
8025 pVar->idxReg = idxArgReg;
8026 }
8027 else
8028 {
8029 /* Use ARG0 as temp for stuff we need registers for. */
8030 switch (pVar->enmKind)
8031 {
8032 case kIemNativeVarKind_Stack:
8033 {
8034 uint8_t const idxStackSlot = pVar->idxStackSlot;
8035 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8036 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8037 continue;
8038 }
8039
8040 case kIemNativeVarKind_Immediate:
8041 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8042 continue;
8043
8044 case kIemNativeVarKind_VarRef:
8045 {
8046 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8047 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8048 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8049 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8050 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8051 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8052 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8053 {
8054 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8055 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8056 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8057 }
8058 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8059 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8060 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8061 continue;
8062 }
8063
8064 case kIemNativeVarKind_GstRegRef:
8065 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8066 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8067 continue;
8068
8069 case kIemNativeVarKind_Invalid:
8070 case kIemNativeVarKind_End:
8071 break;
8072 }
8073 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8074 }
8075 }
8076 }
8077#if 0 //def VBOX_STRICT
8078 iemNativeRegAssertSanity(pReNative);
8079#endif
8080 }
8081#ifdef VBOX_STRICT
8082 else
8083 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8084 {
8085 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8086 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8087 }
8088#endif
8089
8090 /*
8091 * Free all argument variables (simplified).
8092 * Their lifetime always expires with the call they are for.
8093 */
8094 /** @todo Make the python script check that arguments aren't used after
8095 * IEM_MC_CALL_XXXX. */
8096 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8097 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8098 * an argument value. There is also some FPU stuff. */
8099 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8100 {
8101 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8102 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8103
8104 /* no need to free registers: */
8105 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8106 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8107 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8108 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8109 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8110 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8111
8112 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8113 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8114 iemNativeVarFreeStackSlots(pReNative, idxVar);
8115 }
8116 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8117
8118 /*
8119 * Flush volatile registers as we make the call.
8120 */
8121 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8122
8123 return off;
8124}
8125
8126
8127
8128/*********************************************************************************************************************************
8129* TLB Lookup. *
8130*********************************************************************************************************************************/
8131
8132/**
8133 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8134 */
8135DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8136{
8137 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8138 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8139 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8140 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8141
8142 /* Do the lookup manually. */
8143 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8144 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8145 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8146 if (RT_LIKELY(pTlbe->uTag == uTag))
8147 {
8148 /*
8149 * Check TLB page table level access flags.
8150 */
8151 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8152 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8153 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8154 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8155 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8156 | IEMTLBE_F_PG_UNASSIGNED
8157 | IEMTLBE_F_PT_NO_ACCESSED
8158 | fNoWriteNoDirty | fNoUser);
8159 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8160 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8161 {
8162 /*
8163 * Return the address.
8164 */
8165 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8166 if ((uintptr_t)pbAddr == uResult)
8167 return;
8168 RT_NOREF(cbMem);
8169 AssertFailed();
8170 }
8171 else
8172 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8173 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8174 }
8175 else
8176 AssertFailed();
8177 RT_BREAKPOINT();
8178}
8179
8180/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8181
8182
8183
8184/*********************************************************************************************************************************
8185* Recompiler Core. *
8186*********************************************************************************************************************************/
8187
8188/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8189static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8190{
8191 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8192 pDis->cbCachedInstr += cbMaxRead;
8193 RT_NOREF(cbMinRead);
8194 return VERR_NO_DATA;
8195}
8196
8197
8198DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8199{
8200 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8201 {
8202#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8203 ENTRY(fLocalForcedActions),
8204 ENTRY(iem.s.rcPassUp),
8205 ENTRY(iem.s.fExec),
8206 ENTRY(iem.s.pbInstrBuf),
8207 ENTRY(iem.s.uInstrBufPc),
8208 ENTRY(iem.s.GCPhysInstrBuf),
8209 ENTRY(iem.s.cbInstrBufTotal),
8210 ENTRY(iem.s.idxTbCurInstr),
8211#ifdef VBOX_WITH_STATISTICS
8212 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8213 ENTRY(iem.s.StatNativeTlbHitsForStore),
8214 ENTRY(iem.s.StatNativeTlbHitsForStack),
8215 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8216 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8217 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8218 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8219 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8220#endif
8221 ENTRY(iem.s.DataTlb.aEntries),
8222 ENTRY(iem.s.DataTlb.uTlbRevision),
8223 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8224 ENTRY(iem.s.DataTlb.cTlbHits),
8225 ENTRY(iem.s.CodeTlb.aEntries),
8226 ENTRY(iem.s.CodeTlb.uTlbRevision),
8227 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8228 ENTRY(iem.s.CodeTlb.cTlbHits),
8229 ENTRY(pVMR3),
8230 ENTRY(cpum.GstCtx.rax),
8231 ENTRY(cpum.GstCtx.ah),
8232 ENTRY(cpum.GstCtx.rcx),
8233 ENTRY(cpum.GstCtx.ch),
8234 ENTRY(cpum.GstCtx.rdx),
8235 ENTRY(cpum.GstCtx.dh),
8236 ENTRY(cpum.GstCtx.rbx),
8237 ENTRY(cpum.GstCtx.bh),
8238 ENTRY(cpum.GstCtx.rsp),
8239 ENTRY(cpum.GstCtx.rbp),
8240 ENTRY(cpum.GstCtx.rsi),
8241 ENTRY(cpum.GstCtx.rdi),
8242 ENTRY(cpum.GstCtx.r8),
8243 ENTRY(cpum.GstCtx.r9),
8244 ENTRY(cpum.GstCtx.r10),
8245 ENTRY(cpum.GstCtx.r11),
8246 ENTRY(cpum.GstCtx.r12),
8247 ENTRY(cpum.GstCtx.r13),
8248 ENTRY(cpum.GstCtx.r14),
8249 ENTRY(cpum.GstCtx.r15),
8250 ENTRY(cpum.GstCtx.es.Sel),
8251 ENTRY(cpum.GstCtx.es.u64Base),
8252 ENTRY(cpum.GstCtx.es.u32Limit),
8253 ENTRY(cpum.GstCtx.es.Attr),
8254 ENTRY(cpum.GstCtx.cs.Sel),
8255 ENTRY(cpum.GstCtx.cs.u64Base),
8256 ENTRY(cpum.GstCtx.cs.u32Limit),
8257 ENTRY(cpum.GstCtx.cs.Attr),
8258 ENTRY(cpum.GstCtx.ss.Sel),
8259 ENTRY(cpum.GstCtx.ss.u64Base),
8260 ENTRY(cpum.GstCtx.ss.u32Limit),
8261 ENTRY(cpum.GstCtx.ss.Attr),
8262 ENTRY(cpum.GstCtx.ds.Sel),
8263 ENTRY(cpum.GstCtx.ds.u64Base),
8264 ENTRY(cpum.GstCtx.ds.u32Limit),
8265 ENTRY(cpum.GstCtx.ds.Attr),
8266 ENTRY(cpum.GstCtx.fs.Sel),
8267 ENTRY(cpum.GstCtx.fs.u64Base),
8268 ENTRY(cpum.GstCtx.fs.u32Limit),
8269 ENTRY(cpum.GstCtx.fs.Attr),
8270 ENTRY(cpum.GstCtx.gs.Sel),
8271 ENTRY(cpum.GstCtx.gs.u64Base),
8272 ENTRY(cpum.GstCtx.gs.u32Limit),
8273 ENTRY(cpum.GstCtx.gs.Attr),
8274 ENTRY(cpum.GstCtx.rip),
8275 ENTRY(cpum.GstCtx.eflags),
8276 ENTRY(cpum.GstCtx.uRipInhibitInt),
8277 ENTRY(cpum.GstCtx.cr0),
8278 ENTRY(cpum.GstCtx.cr4),
8279 ENTRY(cpum.GstCtx.aXcr[0]),
8280 ENTRY(cpum.GstCtx.aXcr[1]),
8281#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8282 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8283 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8284 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8285 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8286 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8287 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8288 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8289 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8290 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8291 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8292 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8293 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8294 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8295 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8296 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8297 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8298 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8299 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8300 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8301 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8302 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8303 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8304 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8305 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8306 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8307 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8308 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8309 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8310 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8311 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8312 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8313 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8314#endif
8315#undef ENTRY
8316 };
8317#ifdef VBOX_STRICT
8318 static bool s_fOrderChecked = false;
8319 if (!s_fOrderChecked)
8320 {
8321 s_fOrderChecked = true;
8322 uint32_t offPrev = s_aMembers[0].off;
8323 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8324 {
8325 Assert(s_aMembers[i].off > offPrev);
8326 offPrev = s_aMembers[i].off;
8327 }
8328 }
8329#endif
8330
8331 /*
8332 * Binary lookup.
8333 */
8334 unsigned iStart = 0;
8335 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8336 for (;;)
8337 {
8338 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8339 uint32_t const offCur = s_aMembers[iCur].off;
8340 if (off < offCur)
8341 {
8342 if (iCur != iStart)
8343 iEnd = iCur;
8344 else
8345 break;
8346 }
8347 else if (off > offCur)
8348 {
8349 if (iCur + 1 < iEnd)
8350 iStart = iCur + 1;
8351 else
8352 break;
8353 }
8354 else
8355 return s_aMembers[iCur].pszName;
8356 }
8357#ifdef VBOX_WITH_STATISTICS
8358 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8359 return "iem.s.acThreadedFuncStats[iFn]";
8360#endif
8361 return NULL;
8362}
8363
8364
8365DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8366{
8367 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8368#if defined(RT_ARCH_AMD64)
8369 static const char * const a_apszMarkers[] =
8370 {
8371 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8372 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8373 };
8374#endif
8375
8376 char szDisBuf[512];
8377 DISSTATE Dis;
8378 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8379 uint32_t const cNative = pTb->Native.cInstructions;
8380 uint32_t offNative = 0;
8381#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8382 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8383#endif
8384 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8385 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8386 : DISCPUMODE_64BIT;
8387#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8388 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8389#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8390 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8391#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8392# error "Port me"
8393#else
8394 csh hDisasm = ~(size_t)0;
8395# if defined(RT_ARCH_AMD64)
8396 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8397# elif defined(RT_ARCH_ARM64)
8398 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8399# else
8400# error "Port me"
8401# endif
8402 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8403
8404 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8405 //Assert(rcCs == CS_ERR_OK);
8406#endif
8407
8408 /*
8409 * Print TB info.
8410 */
8411 pHlp->pfnPrintf(pHlp,
8412 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8413 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8414 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8415 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8416#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8417 if (pDbgInfo && pDbgInfo->cEntries > 1)
8418 {
8419 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8420
8421 /*
8422 * This disassembly is driven by the debug info which follows the native
8423 * code and indicates when it starts with the next guest instructions,
8424 * where labels are and such things.
8425 */
8426 uint32_t idxThreadedCall = 0;
8427 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8428 uint8_t idxRange = UINT8_MAX;
8429 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8430 uint32_t offRange = 0;
8431 uint32_t offOpcodes = 0;
8432 uint32_t const cbOpcodes = pTb->cbOpcodes;
8433 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8434 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8435 uint32_t iDbgEntry = 1;
8436 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8437
8438 while (offNative < cNative)
8439 {
8440 /* If we're at or have passed the point where the next chunk of debug
8441 info starts, process it. */
8442 if (offDbgNativeNext <= offNative)
8443 {
8444 offDbgNativeNext = UINT32_MAX;
8445 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8446 {
8447 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8448 {
8449 case kIemTbDbgEntryType_GuestInstruction:
8450 {
8451 /* Did the exec flag change? */
8452 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8453 {
8454 pHlp->pfnPrintf(pHlp,
8455 " fExec change %#08x -> %#08x %s\n",
8456 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8457 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8458 szDisBuf, sizeof(szDisBuf)));
8459 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8460 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8461 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8462 : DISCPUMODE_64BIT;
8463 }
8464
8465 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8466 where the compilation was aborted before the opcode was recorded and the actual
8467 instruction was translated to a threaded call. This may happen when we run out
8468 of ranges, or when some complicated interrupts/FFs are found to be pending or
8469 similar. So, we just deal with it here rather than in the compiler code as it
8470 is a lot simpler to do here. */
8471 if ( idxRange == UINT8_MAX
8472 || idxRange >= cRanges
8473 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8474 {
8475 idxRange += 1;
8476 if (idxRange < cRanges)
8477 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8478 else
8479 continue;
8480 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8481 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8482 + (pTb->aRanges[idxRange].idxPhysPage == 0
8483 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8484 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8485 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8486 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8487 pTb->aRanges[idxRange].idxPhysPage);
8488 GCPhysPc += offRange;
8489 }
8490
8491 /* Disassemble the instruction. */
8492 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8493 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8494 uint32_t cbInstr = 1;
8495 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8496 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8497 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8498 if (RT_SUCCESS(rc))
8499 {
8500 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8501 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8502 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8503 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8504
8505 static unsigned const s_offMarker = 55;
8506 static char const s_szMarker[] = " ; <--- guest";
8507 if (cch < s_offMarker)
8508 {
8509 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8510 cch = s_offMarker;
8511 }
8512 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8513 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8514
8515 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8516 }
8517 else
8518 {
8519 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8520 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8521 cbInstr = 1;
8522 }
8523 GCPhysPc += cbInstr;
8524 offOpcodes += cbInstr;
8525 offRange += cbInstr;
8526 continue;
8527 }
8528
8529 case kIemTbDbgEntryType_ThreadedCall:
8530 pHlp->pfnPrintf(pHlp,
8531 " Call #%u to %s (%u args) - %s\n",
8532 idxThreadedCall,
8533 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8534 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8535 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8536 idxThreadedCall++;
8537 continue;
8538
8539 case kIemTbDbgEntryType_GuestRegShadowing:
8540 {
8541 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8542 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8543 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8544 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8545 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8546 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8547 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8548 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8549 else
8550 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8551 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8552 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8553 continue;
8554 }
8555
8556#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8557 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8558 {
8559 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8560 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8561 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8562 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8563 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8564 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8565 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8566 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8567 else
8568 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8569 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8570 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8571 continue;
8572 }
8573#endif
8574
8575 case kIemTbDbgEntryType_Label:
8576 {
8577 const char *pszName = "what_the_fudge";
8578 const char *pszComment = "";
8579 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8580 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8581 {
8582 case kIemNativeLabelType_Return: pszName = "Return"; break;
8583 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8584 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8585 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8586 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8587 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8588 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8589 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8590 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8591 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8592 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8593 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8594 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8595 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8596 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8597 case kIemNativeLabelType_If:
8598 pszName = "If";
8599 fNumbered = true;
8600 break;
8601 case kIemNativeLabelType_Else:
8602 pszName = "Else";
8603 fNumbered = true;
8604 pszComment = " ; regs state restored pre-if-block";
8605 break;
8606 case kIemNativeLabelType_Endif:
8607 pszName = "Endif";
8608 fNumbered = true;
8609 break;
8610 case kIemNativeLabelType_CheckIrq:
8611 pszName = "CheckIrq_CheckVM";
8612 fNumbered = true;
8613 break;
8614 case kIemNativeLabelType_TlbLookup:
8615 pszName = "TlbLookup";
8616 fNumbered = true;
8617 break;
8618 case kIemNativeLabelType_TlbMiss:
8619 pszName = "TlbMiss";
8620 fNumbered = true;
8621 break;
8622 case kIemNativeLabelType_TlbDone:
8623 pszName = "TlbDone";
8624 fNumbered = true;
8625 break;
8626 case kIemNativeLabelType_Invalid:
8627 case kIemNativeLabelType_End:
8628 break;
8629 }
8630 if (fNumbered)
8631 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8632 else
8633 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8634 continue;
8635 }
8636
8637 case kIemTbDbgEntryType_NativeOffset:
8638 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8639 Assert(offDbgNativeNext > offNative);
8640 break;
8641
8642#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8643 case kIemTbDbgEntryType_DelayedPcUpdate:
8644 pHlp->pfnPrintf(pHlp,
8645 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8646 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8647 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8648 continue;
8649#endif
8650
8651 default:
8652 AssertFailed();
8653 }
8654 iDbgEntry++;
8655 break;
8656 }
8657 }
8658
8659 /*
8660 * Disassemble the next native instruction.
8661 */
8662 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8663# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8664 uint32_t cbInstr = sizeof(paNative[0]);
8665 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8666 if (RT_SUCCESS(rc))
8667 {
8668# if defined(RT_ARCH_AMD64)
8669 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8670 {
8671 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8672 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8673 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8674 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8675 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8676 uInfo & 0x8000 ? "recompiled" : "todo");
8677 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8678 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8679 else
8680 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8681 }
8682 else
8683# endif
8684 {
8685 const char *pszAnnotation = NULL;
8686# ifdef RT_ARCH_AMD64
8687 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8688 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8689 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8690 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8691 PCDISOPPARAM pMemOp;
8692 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8693 pMemOp = &Dis.Param1;
8694 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8695 pMemOp = &Dis.Param2;
8696 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8697 pMemOp = &Dis.Param3;
8698 else
8699 pMemOp = NULL;
8700 if ( pMemOp
8701 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8702 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8703 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8704 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8705
8706#elif defined(RT_ARCH_ARM64)
8707 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8708 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8709 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8710# else
8711# error "Port me"
8712# endif
8713 if (pszAnnotation)
8714 {
8715 static unsigned const s_offAnnotation = 55;
8716 size_t const cchAnnotation = strlen(pszAnnotation);
8717 size_t cchDis = strlen(szDisBuf);
8718 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8719 {
8720 if (cchDis < s_offAnnotation)
8721 {
8722 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8723 cchDis = s_offAnnotation;
8724 }
8725 szDisBuf[cchDis++] = ' ';
8726 szDisBuf[cchDis++] = ';';
8727 szDisBuf[cchDis++] = ' ';
8728 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8729 }
8730 }
8731 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8732 }
8733 }
8734 else
8735 {
8736# if defined(RT_ARCH_AMD64)
8737 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8738 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8739# elif defined(RT_ARCH_ARM64)
8740 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8741# else
8742# error "Port me"
8743# endif
8744 cbInstr = sizeof(paNative[0]);
8745 }
8746 offNative += cbInstr / sizeof(paNative[0]);
8747
8748# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8749 cs_insn *pInstr;
8750 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8751 (uintptr_t)pNativeCur, 1, &pInstr);
8752 if (cInstrs > 0)
8753 {
8754 Assert(cInstrs == 1);
8755 const char *pszAnnotation = NULL;
8756# if defined(RT_ARCH_ARM64)
8757 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8758 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8759 {
8760 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8761 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8762 char *psz = strchr(pInstr->op_str, '[');
8763 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8764 {
8765 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8766 int32_t off = -1;
8767 psz += 4;
8768 if (*psz == ']')
8769 off = 0;
8770 else if (*psz == ',')
8771 {
8772 psz = RTStrStripL(psz + 1);
8773 if (*psz == '#')
8774 off = RTStrToInt32(&psz[1]);
8775 /** @todo deal with index registers and LSL as well... */
8776 }
8777 if (off >= 0)
8778 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8779 }
8780 }
8781# endif
8782
8783 size_t const cchOp = strlen(pInstr->op_str);
8784# if defined(RT_ARCH_AMD64)
8785 if (pszAnnotation)
8786 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8787 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8788 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8789 else
8790 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8791 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8792
8793# else
8794 if (pszAnnotation)
8795 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8796 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8797 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8798 else
8799 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8800 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8801# endif
8802 offNative += pInstr->size / sizeof(*pNativeCur);
8803 cs_free(pInstr, cInstrs);
8804 }
8805 else
8806 {
8807# if defined(RT_ARCH_AMD64)
8808 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8809 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8810# else
8811 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8812# endif
8813 offNative++;
8814 }
8815# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8816 }
8817 }
8818 else
8819#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8820 {
8821 /*
8822 * No debug info, just disassemble the x86 code and then the native code.
8823 *
8824 * First the guest code:
8825 */
8826 for (unsigned i = 0; i < pTb->cRanges; i++)
8827 {
8828 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8829 + (pTb->aRanges[i].idxPhysPage == 0
8830 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8831 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8832 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8833 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8834 unsigned off = pTb->aRanges[i].offOpcodes;
8835 /** @todo this ain't working when crossing pages! */
8836 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8837 while (off < cbOpcodes)
8838 {
8839 uint32_t cbInstr = 1;
8840 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8841 &pTb->pabOpcodes[off], cbOpcodes - off,
8842 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8843 if (RT_SUCCESS(rc))
8844 {
8845 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8846 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8847 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8848 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8849 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8850 GCPhysPc += cbInstr;
8851 off += cbInstr;
8852 }
8853 else
8854 {
8855 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8856 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8857 break;
8858 }
8859 }
8860 }
8861
8862 /*
8863 * Then the native code:
8864 */
8865 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8866 while (offNative < cNative)
8867 {
8868 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8869# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8870 uint32_t cbInstr = sizeof(paNative[0]);
8871 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8872 if (RT_SUCCESS(rc))
8873 {
8874# if defined(RT_ARCH_AMD64)
8875 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8876 {
8877 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8878 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8879 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8880 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8881 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8882 uInfo & 0x8000 ? "recompiled" : "todo");
8883 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8884 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8885 else
8886 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8887 }
8888 else
8889# endif
8890 {
8891# ifdef RT_ARCH_AMD64
8892 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8893 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8894 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8895 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8896# elif defined(RT_ARCH_ARM64)
8897 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8898 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8899 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8900# else
8901# error "Port me"
8902# endif
8903 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8904 }
8905 }
8906 else
8907 {
8908# if defined(RT_ARCH_AMD64)
8909 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8910 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8911# else
8912 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8913# endif
8914 cbInstr = sizeof(paNative[0]);
8915 }
8916 offNative += cbInstr / sizeof(paNative[0]);
8917
8918# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8919 cs_insn *pInstr;
8920 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8921 (uintptr_t)pNativeCur, 1, &pInstr);
8922 if (cInstrs > 0)
8923 {
8924 Assert(cInstrs == 1);
8925# if defined(RT_ARCH_AMD64)
8926 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8927 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8928# else
8929 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8930 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8931# endif
8932 offNative += pInstr->size / sizeof(*pNativeCur);
8933 cs_free(pInstr, cInstrs);
8934 }
8935 else
8936 {
8937# if defined(RT_ARCH_AMD64)
8938 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8939 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8940# else
8941 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8942# endif
8943 offNative++;
8944 }
8945# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8946 }
8947 }
8948
8949#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8950 /* Cleanup. */
8951 cs_close(&hDisasm);
8952#endif
8953}
8954
8955
8956/**
8957 * Recompiles the given threaded TB into a native one.
8958 *
8959 * In case of failure the translation block will be returned as-is.
8960 *
8961 * @returns pTb.
8962 * @param pVCpu The cross context virtual CPU structure of the calling
8963 * thread.
8964 * @param pTb The threaded translation to recompile to native.
8965 */
8966DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8967{
8968 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
8969
8970 /*
8971 * The first time thru, we allocate the recompiler state, the other times
8972 * we just need to reset it before using it again.
8973 */
8974 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
8975 if (RT_LIKELY(pReNative))
8976 iemNativeReInit(pReNative, pTb);
8977 else
8978 {
8979 pReNative = iemNativeInit(pVCpu, pTb);
8980 AssertReturn(pReNative, pTb);
8981 }
8982
8983#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8984 /*
8985 * First do liveness analysis. This is done backwards.
8986 */
8987 {
8988 uint32_t idxCall = pTb->Thrd.cCalls;
8989 if (idxCall <= pReNative->cLivenessEntriesAlloc)
8990 { /* likely */ }
8991 else
8992 {
8993 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
8994 while (idxCall > cAlloc)
8995 cAlloc *= 2;
8996 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
8997 AssertReturn(pvNew, pTb);
8998 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
8999 pReNative->cLivenessEntriesAlloc = cAlloc;
9000 }
9001 AssertReturn(idxCall > 0, pTb);
9002 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9003
9004 /* The initial (final) entry. */
9005 idxCall--;
9006 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9007
9008 /* Loop backwards thru the calls and fill in the other entries. */
9009 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9010 while (idxCall > 0)
9011 {
9012 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9013 if (pfnLiveness)
9014 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9015 else
9016 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9017 pCallEntry--;
9018 idxCall--;
9019 }
9020
9021# ifdef VBOX_WITH_STATISTICS
9022 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9023 to 'clobbered' rather that 'input'. */
9024 /** @todo */
9025# endif
9026 }
9027#endif
9028
9029 /*
9030 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9031 * for aborting if an error happens.
9032 */
9033 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9034#ifdef LOG_ENABLED
9035 uint32_t const cCallsOrg = cCallsLeft;
9036#endif
9037 uint32_t off = 0;
9038 int rc = VINF_SUCCESS;
9039 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9040 {
9041 /*
9042 * Emit prolog code (fixed).
9043 */
9044 off = iemNativeEmitProlog(pReNative, off);
9045
9046 /*
9047 * Convert the calls to native code.
9048 */
9049#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9050 int32_t iGstInstr = -1;
9051#endif
9052#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9053 uint32_t cThreadedCalls = 0;
9054 uint32_t cRecompiledCalls = 0;
9055#endif
9056#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9057 uint32_t idxCurCall = 0;
9058#endif
9059 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9060 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9061 while (cCallsLeft-- > 0)
9062 {
9063 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9064#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9065 pReNative->idxCurCall = idxCurCall;
9066#endif
9067
9068 /*
9069 * Debug info, assembly markup and statistics.
9070 */
9071#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9072 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9073 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9074#endif
9075#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9076 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9077 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9078 {
9079 if (iGstInstr < (int32_t)pTb->cInstructions)
9080 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9081 else
9082 Assert(iGstInstr == pTb->cInstructions);
9083 iGstInstr = pCallEntry->idxInstr;
9084 }
9085 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9086#endif
9087#if defined(VBOX_STRICT)
9088 off = iemNativeEmitMarker(pReNative, off,
9089 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9090#endif
9091#if defined(VBOX_STRICT)
9092 iemNativeRegAssertSanity(pReNative);
9093#endif
9094#ifdef VBOX_WITH_STATISTICS
9095 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9096#endif
9097
9098 /*
9099 * Actual work.
9100 */
9101 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9102 pfnRecom ? "(recompiled)" : "(todo)"));
9103 if (pfnRecom) /** @todo stats on this. */
9104 {
9105 off = pfnRecom(pReNative, off, pCallEntry);
9106 STAM_REL_STATS({cRecompiledCalls++;});
9107 }
9108 else
9109 {
9110 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9111 STAM_REL_STATS({cThreadedCalls++;});
9112 }
9113 Assert(off <= pReNative->cInstrBufAlloc);
9114 Assert(pReNative->cCondDepth == 0);
9115
9116#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9117 if (LogIs2Enabled())
9118 {
9119 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9120# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9121 static const char s_achState[] = "CUXI";
9122# else
9123 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9124# endif
9125
9126 char szGpr[17];
9127 for (unsigned i = 0; i < 16; i++)
9128 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9129 szGpr[16] = '\0';
9130
9131 char szSegBase[X86_SREG_COUNT + 1];
9132 char szSegLimit[X86_SREG_COUNT + 1];
9133 char szSegAttrib[X86_SREG_COUNT + 1];
9134 char szSegSel[X86_SREG_COUNT + 1];
9135 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9136 {
9137 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9138 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9139 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9140 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9141 }
9142 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9143 = szSegSel[X86_SREG_COUNT] = '\0';
9144
9145 char szEFlags[8];
9146 for (unsigned i = 0; i < 7; i++)
9147 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9148 szEFlags[7] = '\0';
9149
9150 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9151 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9152 }
9153#endif
9154
9155 /*
9156 * Advance.
9157 */
9158 pCallEntry++;
9159#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9160 idxCurCall++;
9161#endif
9162 }
9163
9164 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9165 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9166 if (!cThreadedCalls)
9167 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9168
9169 /*
9170 * Emit the epilog code.
9171 */
9172 uint32_t idxReturnLabel;
9173 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9174
9175 /*
9176 * Generate special jump labels.
9177 */
9178 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9179 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9180 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9181 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9182
9183 /*
9184 * Generate simple TB tail labels that just calls a help with a pVCpu
9185 * arg and either return or longjmps/throws a non-zero status.
9186 *
9187 * The array entries must be ordered by enmLabel value so we can index
9188 * using fTailLabels bit numbers.
9189 */
9190 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9191 static struct
9192 {
9193 IEMNATIVELABELTYPE enmLabel;
9194 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9195 } const g_aSimpleTailLabels[] =
9196 {
9197 { kIemNativeLabelType_Invalid, NULL },
9198 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9199 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9200 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9201 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9202 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9203 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9204 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9205 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9206 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9207 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9208 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9209 };
9210 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9211 AssertCompile(kIemNativeLabelType_Invalid == 0);
9212 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9213 if (fTailLabels)
9214 {
9215 do
9216 {
9217 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9218 fTailLabels &= ~RT_BIT_64(enmLabel);
9219 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9220
9221 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9222 Assert(idxLabel != UINT32_MAX);
9223 if (idxLabel != UINT32_MAX)
9224 {
9225 iemNativeLabelDefine(pReNative, idxLabel, off);
9226
9227 /* int pfnCallback(PVMCPUCC pVCpu) */
9228 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9229 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9230
9231 /* jump back to the return sequence. */
9232 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9233 }
9234
9235 } while (fTailLabels);
9236 }
9237 }
9238 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9239 {
9240 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9241 return pTb;
9242 }
9243 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9244 Assert(off <= pReNative->cInstrBufAlloc);
9245
9246 /*
9247 * Make sure all labels has been defined.
9248 */
9249 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9250#ifdef VBOX_STRICT
9251 uint32_t const cLabels = pReNative->cLabels;
9252 for (uint32_t i = 0; i < cLabels; i++)
9253 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9254#endif
9255
9256 /*
9257 * Allocate executable memory, copy over the code we've generated.
9258 */
9259 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9260 if (pTbAllocator->pDelayedFreeHead)
9261 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9262
9263 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9264 AssertReturn(paFinalInstrBuf, pTb);
9265 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9266
9267 /*
9268 * Apply fixups.
9269 */
9270 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9271 uint32_t const cFixups = pReNative->cFixups;
9272 for (uint32_t i = 0; i < cFixups; i++)
9273 {
9274 Assert(paFixups[i].off < off);
9275 Assert(paFixups[i].idxLabel < cLabels);
9276 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9277 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9278 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9279 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9280 switch (paFixups[i].enmType)
9281 {
9282#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9283 case kIemNativeFixupType_Rel32:
9284 Assert(paFixups[i].off + 4 <= off);
9285 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9286 continue;
9287
9288#elif defined(RT_ARCH_ARM64)
9289 case kIemNativeFixupType_RelImm26At0:
9290 {
9291 Assert(paFixups[i].off < off);
9292 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9293 Assert(offDisp >= -262144 && offDisp < 262144);
9294 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9295 continue;
9296 }
9297
9298 case kIemNativeFixupType_RelImm19At5:
9299 {
9300 Assert(paFixups[i].off < off);
9301 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9302 Assert(offDisp >= -262144 && offDisp < 262144);
9303 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9304 continue;
9305 }
9306
9307 case kIemNativeFixupType_RelImm14At5:
9308 {
9309 Assert(paFixups[i].off < off);
9310 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9311 Assert(offDisp >= -8192 && offDisp < 8192);
9312 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9313 continue;
9314 }
9315
9316#endif
9317 case kIemNativeFixupType_Invalid:
9318 case kIemNativeFixupType_End:
9319 break;
9320 }
9321 AssertFailed();
9322 }
9323
9324 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9325 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9326
9327 /*
9328 * Convert the translation block.
9329 */
9330 RTMemFree(pTb->Thrd.paCalls);
9331 pTb->Native.paInstructions = paFinalInstrBuf;
9332 pTb->Native.cInstructions = off;
9333 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9334#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9335 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9336 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9337#endif
9338
9339 Assert(pTbAllocator->cThreadedTbs > 0);
9340 pTbAllocator->cThreadedTbs -= 1;
9341 pTbAllocator->cNativeTbs += 1;
9342 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9343
9344#ifdef LOG_ENABLED
9345 /*
9346 * Disassemble to the log if enabled.
9347 */
9348 if (LogIs3Enabled())
9349 {
9350 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9351 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9352# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9353 RTLogFlush(NULL);
9354# endif
9355 }
9356#endif
9357 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9358
9359 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9360 return pTb;
9361}
9362
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette