VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103899

Last change on this file since 103899 was 103895, checked in by vboxsync, 13 months ago

VMM/IEM: Add SIMD local variable support and implement native emitters for IEM_MC_FETCH_YREG_U256() and IEM_MC_STORE_YREG_U256_ZX_VLMAX(), bugref:10614 [build fix]

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 418.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103895 2024-03-18 13:55:40Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873/**
1874 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1875 */
1876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1877{
1878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1879 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1880#else
1881 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1882#endif
1883}
1884
1885
1886/**
1887 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1888 */
1889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1890{
1891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1892 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1893#else
1894 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1895#endif
1896}
1897
1898
1899/**
1900 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1901 */
1902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1903{
1904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1905 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1906#else
1907 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1908#endif
1909}
1910
1911
1912/**
1913 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1914 */
1915IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1916{
1917#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1918 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1919#else
1920 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1921#endif
1922}
1923
1924
1925
1926/**
1927 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1932 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1933#else
1934 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1935#endif
1936}
1937
1938
1939/**
1940 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1941 */
1942IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1943{
1944#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1945 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1946#else
1947 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1948#endif
1949}
1950
1951
1952/**
1953 * Used by TB code to store an 32-bit selector value onto a generic stack.
1954 *
1955 * Intel CPUs doesn't do write a whole dword, thus the special function.
1956 */
1957IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1958{
1959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1960 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1961#else
1962 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1963#endif
1964}
1965
1966
1967/**
1968 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1969 */
1970IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1971{
1972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1973 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1974#else
1975 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1976#endif
1977}
1978
1979
1980/**
1981 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1982 */
1983IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1984{
1985#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1986 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1987#else
1988 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1989#endif
1990}
1991
1992
1993/**
1994 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1999 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2000#else
2001 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2008 */
2009IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2010{
2011#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2012 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2013#else
2014 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2015#endif
2016}
2017
2018
2019
2020/*********************************************************************************************************************************
2021* Helpers: Flat memory fetches and stores. *
2022*********************************************************************************************************************************/
2023
2024/**
2025 * Used by TB code to load unsigned 8-bit data w/ flat address.
2026 * @note Zero extending the value to 64-bit to simplify assembly.
2027 */
2028IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2029{
2030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2031 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2032#else
2033 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2034#endif
2035}
2036
2037
2038/**
2039 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2040 * to 16 bits.
2041 * @note Zero extending the value to 64-bit to simplify assembly.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2044{
2045#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2046 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2047#else
2048 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2049#endif
2050}
2051
2052
2053/**
2054 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2055 * to 32 bits.
2056 * @note Zero extending the value to 64-bit to simplify assembly.
2057 */
2058IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2059{
2060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2061 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2062#else
2063 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2064#endif
2065}
2066
2067
2068/**
2069 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2070 * to 64 bits.
2071 */
2072IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2073{
2074#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2075 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2076#else
2077 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2078#endif
2079}
2080
2081
2082/**
2083 * Used by TB code to load unsigned 16-bit data w/ flat address.
2084 * @note Zero extending the value to 64-bit to simplify assembly.
2085 */
2086IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2087{
2088#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2089 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2090#else
2091 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2092#endif
2093}
2094
2095
2096/**
2097 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2098 * to 32 bits.
2099 * @note Zero extending the value to 64-bit to simplify assembly.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2102{
2103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2104 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2105#else
2106 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2107#endif
2108}
2109
2110
2111/**
2112 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2113 * to 64 bits.
2114 * @note Zero extending the value to 64-bit to simplify assembly.
2115 */
2116IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2117{
2118#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2119 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2120#else
2121 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2122#endif
2123}
2124
2125
2126/**
2127 * Used by TB code to load unsigned 32-bit data w/ flat address.
2128 * @note Zero extending the value to 64-bit to simplify assembly.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2133 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2134#else
2135 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2136#endif
2137}
2138
2139
2140/**
2141 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2142 * to 64 bits.
2143 * @note Zero extending the value to 64-bit to simplify assembly.
2144 */
2145IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2146{
2147#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2148 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2149#else
2150 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2151#endif
2152}
2153
2154
2155/**
2156 * Used by TB code to load unsigned 64-bit data w/ flat address.
2157 */
2158IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2159{
2160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2161 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2162#else
2163 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2164#endif
2165}
2166
2167
2168/**
2169 * Used by TB code to store unsigned 8-bit data w/ flat address.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2172{
2173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2174 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2175#else
2176 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2177#endif
2178}
2179
2180
2181/**
2182 * Used by TB code to store unsigned 16-bit data w/ flat address.
2183 */
2184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2185{
2186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2187 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2188#else
2189 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2190#endif
2191}
2192
2193
2194/**
2195 * Used by TB code to store unsigned 32-bit data w/ flat address.
2196 */
2197IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2198{
2199#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2200 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2201#else
2202 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2203#endif
2204}
2205
2206
2207/**
2208 * Used by TB code to store unsigned 64-bit data w/ flat address.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2213 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2214#else
2215 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2216#endif
2217}
2218
2219
2220
2221/**
2222 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2227 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2228#else
2229 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2240 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2241#else
2242 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to store a segment selector value onto a flat stack.
2249 *
2250 * Intel CPUs doesn't do write a whole dword, thus the special function.
2251 */
2252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2253{
2254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2255 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2256#else
2257 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2258#endif
2259}
2260
2261
2262/**
2263 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2264 */
2265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2266{
2267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2268 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2269#else
2270 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2271#endif
2272}
2273
2274
2275/**
2276 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2277 */
2278IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2279{
2280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2281 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2282#else
2283 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2284#endif
2285}
2286
2287
2288/**
2289 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2290 */
2291IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2292{
2293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2294 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2295#else
2296 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2297#endif
2298}
2299
2300
2301/**
2302 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2303 */
2304IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2305{
2306#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2307 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2308#else
2309 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2310#endif
2311}
2312
2313
2314
2315/*********************************************************************************************************************************
2316* Helpers: Segmented memory mapping. *
2317*********************************************************************************************************************************/
2318
2319/**
2320 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2321 * segmentation.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2324 RTGCPTR GCPtrMem, uint8_t iSegReg))
2325{
2326#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2327 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2328#else
2329 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2330#endif
2331}
2332
2333
2334/**
2335 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2336 */
2337IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2338 RTGCPTR GCPtrMem, uint8_t iSegReg))
2339{
2340#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2341 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2342#else
2343 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2344#endif
2345}
2346
2347
2348/**
2349 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2350 */
2351IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2352 RTGCPTR GCPtrMem, uint8_t iSegReg))
2353{
2354#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2355 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2356#else
2357 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2358#endif
2359}
2360
2361
2362/**
2363 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2364 */
2365IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2366 RTGCPTR GCPtrMem, uint8_t iSegReg))
2367{
2368#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2369 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2370#else
2371 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2372#endif
2373}
2374
2375
2376/**
2377 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2378 * segmentation.
2379 */
2380IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2381 RTGCPTR GCPtrMem, uint8_t iSegReg))
2382{
2383#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2384 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2385#else
2386 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2387#endif
2388}
2389
2390
2391/**
2392 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2393 */
2394IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2395 RTGCPTR GCPtrMem, uint8_t iSegReg))
2396{
2397#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2398 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2399#else
2400 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2401#endif
2402}
2403
2404
2405/**
2406 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2407 */
2408IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2409 RTGCPTR GCPtrMem, uint8_t iSegReg))
2410{
2411#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2412 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2413#else
2414 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2415#endif
2416}
2417
2418
2419/**
2420 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2421 */
2422IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2423 RTGCPTR GCPtrMem, uint8_t iSegReg))
2424{
2425#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2426 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2427#else
2428 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2429#endif
2430}
2431
2432
2433/**
2434 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2435 * segmentation.
2436 */
2437IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2438 RTGCPTR GCPtrMem, uint8_t iSegReg))
2439{
2440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2441 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2442#else
2443 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2444#endif
2445}
2446
2447
2448/**
2449 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2450 */
2451IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2452 RTGCPTR GCPtrMem, uint8_t iSegReg))
2453{
2454#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2455 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2456#else
2457 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2458#endif
2459}
2460
2461
2462/**
2463 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2464 */
2465IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2466 RTGCPTR GCPtrMem, uint8_t iSegReg))
2467{
2468#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2469 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2470#else
2471 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2472#endif
2473}
2474
2475
2476/**
2477 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2478 */
2479IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2480 RTGCPTR GCPtrMem, uint8_t iSegReg))
2481{
2482#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2483 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2484#else
2485 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2486#endif
2487}
2488
2489
2490/**
2491 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2492 * segmentation.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2495 RTGCPTR GCPtrMem, uint8_t iSegReg))
2496{
2497#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2498 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2499#else
2500 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2501#endif
2502}
2503
2504
2505/**
2506 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2507 */
2508IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2509 RTGCPTR GCPtrMem, uint8_t iSegReg))
2510{
2511#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2512 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2513#else
2514 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2515#endif
2516}
2517
2518
2519/**
2520 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2521 */
2522IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2523 RTGCPTR GCPtrMem, uint8_t iSegReg))
2524{
2525#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2526 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2527#else
2528 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2529#endif
2530}
2531
2532
2533/**
2534 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2535 */
2536IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2537 RTGCPTR GCPtrMem, uint8_t iSegReg))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2540 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2541#else
2542 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2551 RTGCPTR GCPtrMem, uint8_t iSegReg))
2552{
2553#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2554 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2555#else
2556 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2557#endif
2558}
2559
2560
2561/**
2562 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2563 */
2564IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2565 RTGCPTR GCPtrMem, uint8_t iSegReg))
2566{
2567#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2568 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2569#else
2570 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2571#endif
2572}
2573
2574
2575/**
2576 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2577 * segmentation.
2578 */
2579IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2580 RTGCPTR GCPtrMem, uint8_t iSegReg))
2581{
2582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2583 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2584#else
2585 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2586#endif
2587}
2588
2589
2590/**
2591 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2592 */
2593IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2594 RTGCPTR GCPtrMem, uint8_t iSegReg))
2595{
2596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2597 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2598#else
2599 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2600#endif
2601}
2602
2603
2604/**
2605 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2606 */
2607IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2608 RTGCPTR GCPtrMem, uint8_t iSegReg))
2609{
2610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2611 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2612#else
2613 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2614#endif
2615}
2616
2617
2618/**
2619 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2620 */
2621IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2622 RTGCPTR GCPtrMem, uint8_t iSegReg))
2623{
2624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2625 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2626#else
2627 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2628#endif
2629}
2630
2631
2632/*********************************************************************************************************************************
2633* Helpers: Flat memory mapping. *
2634*********************************************************************************************************************************/
2635
2636/**
2637 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2638 * address.
2639 */
2640IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2644#else
2645 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2654{
2655#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2656 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2657#else
2658 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2659#endif
2660}
2661
2662
2663/**
2664 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2665 */
2666IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2667{
2668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2669 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2670#else
2671 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2672#endif
2673}
2674
2675
2676/**
2677 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2678 */
2679IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2680{
2681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2682 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2683#else
2684 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2685#endif
2686}
2687
2688
2689/**
2690 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2691 * address.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2694{
2695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2696 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2697#else
2698 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2699#endif
2700}
2701
2702
2703/**
2704 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2705 */
2706IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2707{
2708#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2709 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2710#else
2711 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2712#endif
2713}
2714
2715
2716/**
2717 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2718 */
2719IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2720{
2721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2722 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2723#else
2724 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2725#endif
2726}
2727
2728
2729/**
2730 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2731 */
2732IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2733{
2734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2735 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2736#else
2737 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2738#endif
2739}
2740
2741
2742/**
2743 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2744 * address.
2745 */
2746IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2747{
2748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2749 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2750#else
2751 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2752#endif
2753}
2754
2755
2756/**
2757 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2758 */
2759IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2760{
2761#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2762 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2763#else
2764 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2765#endif
2766}
2767
2768
2769/**
2770 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2771 */
2772IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2773{
2774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2775 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2776#else
2777 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2778#endif
2779}
2780
2781
2782/**
2783 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2784 */
2785IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2786{
2787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2788 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2789#else
2790 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2791#endif
2792}
2793
2794
2795/**
2796 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2797 * address.
2798 */
2799IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2800{
2801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2802 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2803#else
2804 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2805#endif
2806}
2807
2808
2809/**
2810 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2811 */
2812IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2813{
2814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2815 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2816#else
2817 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2818#endif
2819}
2820
2821
2822/**
2823 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2824 */
2825IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2826{
2827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2828 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2829#else
2830 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2831#endif
2832}
2833
2834
2835/**
2836 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2839{
2840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2841 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2842#else
2843 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2844#endif
2845}
2846
2847
2848/**
2849 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2850 */
2851IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2852{
2853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2854 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2855#else
2856 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2857#endif
2858}
2859
2860
2861/**
2862 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2863 */
2864IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2865{
2866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2867 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2868#else
2869 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2870#endif
2871}
2872
2873
2874/**
2875 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2876 * address.
2877 */
2878IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2879{
2880#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2881 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2882#else
2883 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2884#endif
2885}
2886
2887
2888/**
2889 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2890 */
2891IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2892{
2893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2894 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2895#else
2896 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2897#endif
2898}
2899
2900
2901/**
2902 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2903 */
2904IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2905{
2906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2907 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2908#else
2909 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2910#endif
2911}
2912
2913
2914/**
2915 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2916 */
2917IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2918{
2919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2920 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2921#else
2922 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2923#endif
2924}
2925
2926
2927/*********************************************************************************************************************************
2928* Helpers: Commit, rollback & unmap *
2929*********************************************************************************************************************************/
2930
2931/**
2932 * Used by TB code to commit and unmap a read-write memory mapping.
2933 */
2934IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2935{
2936 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2937}
2938
2939
2940/**
2941 * Used by TB code to commit and unmap a read-write memory mapping.
2942 */
2943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2944{
2945 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2946}
2947
2948
2949/**
2950 * Used by TB code to commit and unmap a write-only memory mapping.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2953{
2954 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2955}
2956
2957
2958/**
2959 * Used by TB code to commit and unmap a read-only memory mapping.
2960 */
2961IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2962{
2963 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2964}
2965
2966
2967/**
2968 * Reinitializes the native recompiler state.
2969 *
2970 * Called before starting a new recompile job.
2971 */
2972static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2973{
2974 pReNative->cLabels = 0;
2975 pReNative->bmLabelTypes = 0;
2976 pReNative->cFixups = 0;
2977#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2978 pReNative->pDbgInfo->cEntries = 0;
2979#endif
2980 pReNative->pTbOrg = pTb;
2981 pReNative->cCondDepth = 0;
2982 pReNative->uCondSeqNo = 0;
2983 pReNative->uCheckIrqSeqNo = 0;
2984 pReNative->uTlbSeqNo = 0;
2985
2986#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2987 pReNative->Core.offPc = 0;
2988 pReNative->Core.cInstrPcUpdateSkipped = 0;
2989#endif
2990#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2991 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2992#endif
2993 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2994#if IEMNATIVE_HST_GREG_COUNT < 32
2995 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2996#endif
2997 ;
2998 pReNative->Core.bmHstRegsWithGstShadow = 0;
2999 pReNative->Core.bmGstRegShadows = 0;
3000 pReNative->Core.bmVars = 0;
3001 pReNative->Core.bmStack = 0;
3002 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3003 pReNative->Core.u64ArgVars = UINT64_MAX;
3004
3005 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3006 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3007 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3008 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3009 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3010 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3011 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3012 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3013 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3014 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3015 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3016 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3017 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3018 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3019 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3020 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3021 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3022
3023 /* Full host register reinit: */
3024 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3025 {
3026 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3027 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3028 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3029 }
3030
3031 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3032 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3033#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3034 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3035#endif
3036#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3037 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3038#endif
3039#ifdef IEMNATIVE_REG_FIXED_TMP1
3040 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3041#endif
3042#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3043 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3044#endif
3045 );
3046 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3047 {
3048 fRegs &= ~RT_BIT_32(idxReg);
3049 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3050 }
3051
3052 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3053#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3054 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3055#endif
3056#ifdef IEMNATIVE_REG_FIXED_TMP0
3057 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3058#endif
3059#ifdef IEMNATIVE_REG_FIXED_TMP1
3060 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3061#endif
3062#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3063 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3064#endif
3065
3066#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3067# ifdef RT_ARCH_ARM64
3068 /*
3069 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3070 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3071 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3072 * and the register allocator assumes that it will be always free when the lower is picked.
3073 */
3074 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3075# else
3076 uint32_t const fFixedAdditional = 0;
3077# endif
3078
3079 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3080 | fFixedAdditional
3081# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3082 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3083# endif
3084 ;
3085 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3086 pReNative->Core.bmGstSimdRegShadows = 0;
3087 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3088 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3089
3090 /* Full host register reinit: */
3091 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3092 {
3093 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3094 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3095 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3096 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3097 }
3098
3099 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3100 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3101 {
3102 fRegs &= ~RT_BIT_32(idxReg);
3103 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3104 }
3105
3106#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3107 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3108#endif
3109
3110#endif
3111
3112 return pReNative;
3113}
3114
3115
3116/**
3117 * Allocates and initializes the native recompiler state.
3118 *
3119 * This is called the first time an EMT wants to recompile something.
3120 *
3121 * @returns Pointer to the new recompiler state.
3122 * @param pVCpu The cross context virtual CPU structure of the calling
3123 * thread.
3124 * @param pTb The TB that's about to be recompiled.
3125 * @thread EMT(pVCpu)
3126 */
3127static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3128{
3129 VMCPU_ASSERT_EMT(pVCpu);
3130
3131 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3132 AssertReturn(pReNative, NULL);
3133
3134 /*
3135 * Try allocate all the buffers and stuff we need.
3136 */
3137 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3138 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3139 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3140#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3141 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3142#endif
3143 if (RT_LIKELY( pReNative->pInstrBuf
3144 && pReNative->paLabels
3145 && pReNative->paFixups)
3146#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3147 && pReNative->pDbgInfo
3148#endif
3149 )
3150 {
3151 /*
3152 * Set the buffer & array sizes on success.
3153 */
3154 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3155 pReNative->cLabelsAlloc = _8K;
3156 pReNative->cFixupsAlloc = _16K;
3157#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3158 pReNative->cDbgInfoAlloc = _16K;
3159#endif
3160
3161 /* Other constant stuff: */
3162 pReNative->pVCpu = pVCpu;
3163
3164 /*
3165 * Done, just need to save it and reinit it.
3166 */
3167 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3168 return iemNativeReInit(pReNative, pTb);
3169 }
3170
3171 /*
3172 * Failed. Cleanup and return.
3173 */
3174 AssertFailed();
3175 RTMemFree(pReNative->pInstrBuf);
3176 RTMemFree(pReNative->paLabels);
3177 RTMemFree(pReNative->paFixups);
3178#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3179 RTMemFree(pReNative->pDbgInfo);
3180#endif
3181 RTMemFree(pReNative);
3182 return NULL;
3183}
3184
3185
3186/**
3187 * Creates a label
3188 *
3189 * If the label does not yet have a defined position,
3190 * call iemNativeLabelDefine() later to set it.
3191 *
3192 * @returns Label ID. Throws VBox status code on failure, so no need to check
3193 * the return value.
3194 * @param pReNative The native recompile state.
3195 * @param enmType The label type.
3196 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3197 * label is not yet defined (default).
3198 * @param uData Data associated with the lable. Only applicable to
3199 * certain type of labels. Default is zero.
3200 */
3201DECL_HIDDEN_THROW(uint32_t)
3202iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3203 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3204{
3205 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3206
3207 /*
3208 * Locate existing label definition.
3209 *
3210 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3211 * and uData is zero.
3212 */
3213 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3214 uint32_t const cLabels = pReNative->cLabels;
3215 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3216#ifndef VBOX_STRICT
3217 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3218 && offWhere == UINT32_MAX
3219 && uData == 0
3220#endif
3221 )
3222 {
3223#ifndef VBOX_STRICT
3224 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3225 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3226 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3227 if (idxLabel < pReNative->cLabels)
3228 return idxLabel;
3229#else
3230 for (uint32_t i = 0; i < cLabels; i++)
3231 if ( paLabels[i].enmType == enmType
3232 && paLabels[i].uData == uData)
3233 {
3234 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3235 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3236 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3237 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3238 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3239 return i;
3240 }
3241 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3242 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3243#endif
3244 }
3245
3246 /*
3247 * Make sure we've got room for another label.
3248 */
3249 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3250 { /* likely */ }
3251 else
3252 {
3253 uint32_t cNew = pReNative->cLabelsAlloc;
3254 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3255 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3256 cNew *= 2;
3257 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3258 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3259 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3260 pReNative->paLabels = paLabels;
3261 pReNative->cLabelsAlloc = cNew;
3262 }
3263
3264 /*
3265 * Define a new label.
3266 */
3267 paLabels[cLabels].off = offWhere;
3268 paLabels[cLabels].enmType = enmType;
3269 paLabels[cLabels].uData = uData;
3270 pReNative->cLabels = cLabels + 1;
3271
3272 Assert((unsigned)enmType < 64);
3273 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3274
3275 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3276 {
3277 Assert(uData == 0);
3278 pReNative->aidxUniqueLabels[enmType] = cLabels;
3279 }
3280
3281 if (offWhere != UINT32_MAX)
3282 {
3283#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3284 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3285 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3286#endif
3287 }
3288 return cLabels;
3289}
3290
3291
3292/**
3293 * Defines the location of an existing label.
3294 *
3295 * @param pReNative The native recompile state.
3296 * @param idxLabel The label to define.
3297 * @param offWhere The position.
3298 */
3299DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3300{
3301 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3302 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3303 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3304 pLabel->off = offWhere;
3305#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3306 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3307 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3308#endif
3309}
3310
3311
3312/**
3313 * Looks up a lable.
3314 *
3315 * @returns Label ID if found, UINT32_MAX if not.
3316 */
3317static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3318 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3319{
3320 Assert((unsigned)enmType < 64);
3321 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3322 {
3323 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3324 return pReNative->aidxUniqueLabels[enmType];
3325
3326 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3327 uint32_t const cLabels = pReNative->cLabels;
3328 for (uint32_t i = 0; i < cLabels; i++)
3329 if ( paLabels[i].enmType == enmType
3330 && paLabels[i].uData == uData
3331 && ( paLabels[i].off == offWhere
3332 || offWhere == UINT32_MAX
3333 || paLabels[i].off == UINT32_MAX))
3334 return i;
3335 }
3336 return UINT32_MAX;
3337}
3338
3339
3340/**
3341 * Adds a fixup.
3342 *
3343 * @throws VBox status code (int) on failure.
3344 * @param pReNative The native recompile state.
3345 * @param offWhere The instruction offset of the fixup location.
3346 * @param idxLabel The target label ID for the fixup.
3347 * @param enmType The fixup type.
3348 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3349 */
3350DECL_HIDDEN_THROW(void)
3351iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3352 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3353{
3354 Assert(idxLabel <= UINT16_MAX);
3355 Assert((unsigned)enmType <= UINT8_MAX);
3356#ifdef RT_ARCH_ARM64
3357 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3358 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3360#endif
3361
3362 /*
3363 * Make sure we've room.
3364 */
3365 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3366 uint32_t const cFixups = pReNative->cFixups;
3367 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3368 { /* likely */ }
3369 else
3370 {
3371 uint32_t cNew = pReNative->cFixupsAlloc;
3372 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3373 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3374 cNew *= 2;
3375 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3376 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3377 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3378 pReNative->paFixups = paFixups;
3379 pReNative->cFixupsAlloc = cNew;
3380 }
3381
3382 /*
3383 * Add the fixup.
3384 */
3385 paFixups[cFixups].off = offWhere;
3386 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3387 paFixups[cFixups].enmType = enmType;
3388 paFixups[cFixups].offAddend = offAddend;
3389 pReNative->cFixups = cFixups + 1;
3390}
3391
3392
3393/**
3394 * Slow code path for iemNativeInstrBufEnsure.
3395 */
3396DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3397{
3398 /* Double the buffer size till we meet the request. */
3399 uint32_t cNew = pReNative->cInstrBufAlloc;
3400 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3401 do
3402 cNew *= 2;
3403 while (cNew < off + cInstrReq);
3404
3405 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3406#ifdef RT_ARCH_ARM64
3407 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3408#else
3409 uint32_t const cbMaxInstrBuf = _2M;
3410#endif
3411 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3412
3413 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3414 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3415
3416#ifdef VBOX_STRICT
3417 pReNative->offInstrBufChecked = off + cInstrReq;
3418#endif
3419 pReNative->cInstrBufAlloc = cNew;
3420 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3421}
3422
3423#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3424
3425/**
3426 * Grows the static debug info array used during recompilation.
3427 *
3428 * @returns Pointer to the new debug info block; throws VBox status code on
3429 * failure, so no need to check the return value.
3430 */
3431DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3432{
3433 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3434 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3435 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3436 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3437 pReNative->pDbgInfo = pDbgInfo;
3438 pReNative->cDbgInfoAlloc = cNew;
3439 return pDbgInfo;
3440}
3441
3442
3443/**
3444 * Adds a new debug info uninitialized entry, returning the pointer to it.
3445 */
3446DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3447{
3448 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3449 { /* likely */ }
3450 else
3451 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3452 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3453}
3454
3455
3456/**
3457 * Debug Info: Adds a native offset record, if necessary.
3458 */
3459DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3460{
3461 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3462
3463 /*
3464 * Search backwards to see if we've got a similar record already.
3465 */
3466 uint32_t idx = pDbgInfo->cEntries;
3467 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3468 while (idx-- > idxStop)
3469 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3470 {
3471 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3472 return;
3473 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3474 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3475 break;
3476 }
3477
3478 /*
3479 * Add it.
3480 */
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3482 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3483 pEntry->NativeOffset.offNative = off;
3484}
3485
3486
3487/**
3488 * Debug Info: Record info about a label.
3489 */
3490static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3491{
3492 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3493 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3494 pEntry->Label.uUnused = 0;
3495 pEntry->Label.enmLabel = (uint8_t)enmType;
3496 pEntry->Label.uData = uData;
3497}
3498
3499
3500/**
3501 * Debug Info: Record info about a threaded call.
3502 */
3503static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3504{
3505 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3506 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3507 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3508 pEntry->ThreadedCall.uUnused = 0;
3509 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3510}
3511
3512
3513/**
3514 * Debug Info: Record info about a new guest instruction.
3515 */
3516static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3517{
3518 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3519 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3520 pEntry->GuestInstruction.uUnused = 0;
3521 pEntry->GuestInstruction.fExec = fExec;
3522}
3523
3524
3525/**
3526 * Debug Info: Record info about guest register shadowing.
3527 */
3528DECL_HIDDEN_THROW(void)
3529iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3530 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3531{
3532 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3533 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3534 pEntry->GuestRegShadowing.uUnused = 0;
3535 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3536 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3537 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3538}
3539
3540
3541# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3542/**
3543 * Debug Info: Record info about guest register shadowing.
3544 */
3545DECL_HIDDEN_THROW(void)
3546iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3547 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3548{
3549 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3550 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3551 pEntry->GuestSimdRegShadowing.uUnused = 0;
3552 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3553 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3554 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3555}
3556# endif
3557
3558
3559# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3560/**
3561 * Debug Info: Record info about delayed RIP updates.
3562 */
3563DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3564{
3565 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3566 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3567 pEntry->DelayedPcUpdate.offPc = offPc;
3568 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3569}
3570# endif
3571
3572#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3573
3574
3575/*********************************************************************************************************************************
3576* Register Allocator *
3577*********************************************************************************************************************************/
3578
3579/**
3580 * Register parameter indexes (indexed by argument number).
3581 */
3582DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3583{
3584 IEMNATIVE_CALL_ARG0_GREG,
3585 IEMNATIVE_CALL_ARG1_GREG,
3586 IEMNATIVE_CALL_ARG2_GREG,
3587 IEMNATIVE_CALL_ARG3_GREG,
3588#if defined(IEMNATIVE_CALL_ARG4_GREG)
3589 IEMNATIVE_CALL_ARG4_GREG,
3590# if defined(IEMNATIVE_CALL_ARG5_GREG)
3591 IEMNATIVE_CALL_ARG5_GREG,
3592# if defined(IEMNATIVE_CALL_ARG6_GREG)
3593 IEMNATIVE_CALL_ARG6_GREG,
3594# if defined(IEMNATIVE_CALL_ARG7_GREG)
3595 IEMNATIVE_CALL_ARG7_GREG,
3596# endif
3597# endif
3598# endif
3599#endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3602
3603/**
3604 * Call register masks indexed by argument count.
3605 */
3606DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3607{
3608 0,
3609 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3610 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3611 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3612 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3613 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3614#if defined(IEMNATIVE_CALL_ARG4_GREG)
3615 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3616 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3617# if defined(IEMNATIVE_CALL_ARG5_GREG)
3618 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3619 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3620# if defined(IEMNATIVE_CALL_ARG6_GREG)
3621 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3622 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3623 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3624# if defined(IEMNATIVE_CALL_ARG7_GREG)
3625 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3626 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3627 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3628# endif
3629# endif
3630# endif
3631#endif
3632};
3633
3634#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3635/**
3636 * BP offset of the stack argument slots.
3637 *
3638 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3639 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3640 */
3641DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3642{
3643 IEMNATIVE_FP_OFF_STACK_ARG0,
3644# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3645 IEMNATIVE_FP_OFF_STACK_ARG1,
3646# endif
3647# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3648 IEMNATIVE_FP_OFF_STACK_ARG2,
3649# endif
3650# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3651 IEMNATIVE_FP_OFF_STACK_ARG3,
3652# endif
3653};
3654AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3655#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3656
3657/**
3658 * Info about shadowed guest register values.
3659 * @see IEMNATIVEGSTREG
3660 */
3661DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3662{
3663#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3664 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3665 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3666 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3667 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3668 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3669 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3670 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3671 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3672 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3673 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3674 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3675 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3676 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3677 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3678 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3679 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3680 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3681 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3682 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3683 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3684 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3685 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3686 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3687 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3688 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3689 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3690 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3691 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3692 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3693 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3694 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3695 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3696 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3697 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3698 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3699 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3700 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3701 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3702 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3703 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3704 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3705 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3706 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3707 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3708 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3709 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3710 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3711 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3712#undef CPUMCTX_OFF_AND_SIZE
3713};
3714AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3715
3716
3717/** Host CPU general purpose register names. */
3718DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3719{
3720#ifdef RT_ARCH_AMD64
3721 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3722#elif RT_ARCH_ARM64
3723 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3724 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3725#else
3726# error "port me"
3727#endif
3728};
3729
3730
3731#if 0 /* unused */
3732/**
3733 * Tries to locate a suitable register in the given register mask.
3734 *
3735 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3736 * failed.
3737 *
3738 * @returns Host register number on success, returns UINT8_MAX on failure.
3739 */
3740static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3741{
3742 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3743 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3744 if (fRegs)
3745 {
3746 /** @todo pick better here: */
3747 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3748
3749 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3750 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3751 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3752 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3753
3754 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3755 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3756 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3757 return idxReg;
3758 }
3759 return UINT8_MAX;
3760}
3761#endif /* unused */
3762
3763
3764/**
3765 * Locate a register, possibly freeing one up.
3766 *
3767 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3768 * failed.
3769 *
3770 * @returns Host register number on success. Returns UINT8_MAX if no registers
3771 * found, the caller is supposed to deal with this and raise a
3772 * allocation type specific status code (if desired).
3773 *
3774 * @throws VBox status code if we're run into trouble spilling a variable of
3775 * recording debug info. Does NOT throw anything if we're out of
3776 * registers, though.
3777 */
3778static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3779 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3780{
3781 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3782 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3783 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3784
3785 /*
3786 * Try a freed register that's shadowing a guest register.
3787 */
3788 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3789 if (fRegs)
3790 {
3791 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3792
3793#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3794 /*
3795 * When we have livness information, we use it to kick out all shadowed
3796 * guest register that will not be needed any more in this TB. If we're
3797 * lucky, this may prevent us from ending up here again.
3798 *
3799 * Note! We must consider the previous entry here so we don't free
3800 * anything that the current threaded function requires (current
3801 * entry is produced by the next threaded function).
3802 */
3803 uint32_t const idxCurCall = pReNative->idxCurCall;
3804 if (idxCurCall > 0)
3805 {
3806 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3807
3808# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3809 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3810 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3811 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3812#else
3813 /* Construct a mask of the registers not in the read or write state.
3814 Note! We could skips writes, if they aren't from us, as this is just
3815 a hack to prevent trashing registers that have just been written
3816 or will be written when we retire the current instruction. */
3817 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3818 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3819 & IEMLIVENESSBIT_MASK;
3820#endif
3821 /* Merge EFLAGS. */
3822 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3823 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3824 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3825 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3826 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3827
3828 /* If it matches any shadowed registers. */
3829 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3830 {
3831 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3832 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3833 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3834
3835 /* See if we've got any unshadowed registers we can return now. */
3836 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3837 if (fUnshadowedRegs)
3838 {
3839 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3840 return (fPreferVolatile
3841 ? ASMBitFirstSetU32(fUnshadowedRegs)
3842 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3843 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3844 - 1;
3845 }
3846 }
3847 }
3848#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3849
3850 unsigned const idxReg = (fPreferVolatile
3851 ? ASMBitFirstSetU32(fRegs)
3852 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3853 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3854 - 1;
3855
3856 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3857 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3858 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3859 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3860
3861 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3862 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3863 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3864 return idxReg;
3865 }
3866
3867 /*
3868 * Try free up a variable that's in a register.
3869 *
3870 * We do two rounds here, first evacuating variables we don't need to be
3871 * saved on the stack, then in the second round move things to the stack.
3872 */
3873 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3874 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3875 {
3876 uint32_t fVars = pReNative->Core.bmVars;
3877 while (fVars)
3878 {
3879 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3880 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3881 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3882 && (RT_BIT_32(idxReg) & fRegMask)
3883 && ( iLoop == 0
3884 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3885 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3886 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3887 {
3888 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3889 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3890 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3891 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3892 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3893 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3894
3895 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3896 {
3897 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3898 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3899 }
3900
3901 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3902 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3903
3904 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3905 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3906 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3907 return idxReg;
3908 }
3909 fVars &= ~RT_BIT_32(idxVar);
3910 }
3911 }
3912
3913 return UINT8_MAX;
3914}
3915
3916
3917/**
3918 * Reassigns a variable to a different register specified by the caller.
3919 *
3920 * @returns The new code buffer position.
3921 * @param pReNative The native recompile state.
3922 * @param off The current code buffer position.
3923 * @param idxVar The variable index.
3924 * @param idxRegOld The old host register number.
3925 * @param idxRegNew The new host register number.
3926 * @param pszCaller The caller for logging.
3927 */
3928static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3929 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3930{
3931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3932 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3933#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3934 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3935#endif
3936 RT_NOREF(pszCaller);
3937
3938 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3939
3940 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3941 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3942 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3943 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3944
3945 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3946 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3947 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3948 if (fGstRegShadows)
3949 {
3950 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3951 | RT_BIT_32(idxRegNew);
3952 while (fGstRegShadows)
3953 {
3954 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3955 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3956
3957 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3958 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3959 }
3960 }
3961
3962 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3963 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3964 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3965 return off;
3966}
3967
3968
3969/**
3970 * Moves a variable to a different register or spills it onto the stack.
3971 *
3972 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3973 * kinds can easily be recreated if needed later.
3974 *
3975 * @returns The new code buffer position.
3976 * @param pReNative The native recompile state.
3977 * @param off The current code buffer position.
3978 * @param idxVar The variable index.
3979 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3980 * call-volatile registers.
3981 */
3982DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3983 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3984{
3985 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3986 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3987 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3988 Assert(!pVar->fRegAcquired);
3989
3990 uint8_t const idxRegOld = pVar->idxReg;
3991 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3992 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3993 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3994 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3995 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3996 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3997 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3998 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3999
4000
4001 /** @todo Add statistics on this.*/
4002 /** @todo Implement basic variable liveness analysis (python) so variables
4003 * can be freed immediately once no longer used. This has the potential to
4004 * be trashing registers and stack for dead variables.
4005 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4006
4007 /*
4008 * First try move it to a different register, as that's cheaper.
4009 */
4010 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4011 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4012 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4013 if (fRegs)
4014 {
4015 /* Avoid using shadow registers, if possible. */
4016 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4017 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4018 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4019 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4020 }
4021
4022 /*
4023 * Otherwise we must spill the register onto the stack.
4024 */
4025 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4026 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4027 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4028 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4029
4030 pVar->idxReg = UINT8_MAX;
4031 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4032 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4033 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4034 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4035 return off;
4036}
4037
4038
4039/**
4040 * Allocates a temporary host general purpose register.
4041 *
4042 * This may emit code to save register content onto the stack in order to free
4043 * up a register.
4044 *
4045 * @returns The host register number; throws VBox status code on failure,
4046 * so no need to check the return value.
4047 * @param pReNative The native recompile state.
4048 * @param poff Pointer to the variable with the code buffer position.
4049 * This will be update if we need to move a variable from
4050 * register to stack in order to satisfy the request.
4051 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4052 * registers (@c true, default) or the other way around
4053 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4054 */
4055DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4056{
4057 /*
4058 * Try find a completely unused register, preferably a call-volatile one.
4059 */
4060 uint8_t idxReg;
4061 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4062 & ~pReNative->Core.bmHstRegsWithGstShadow
4063 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4064 if (fRegs)
4065 {
4066 if (fPreferVolatile)
4067 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4068 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4069 else
4070 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4071 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4072 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4073 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4074 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4075 }
4076 else
4077 {
4078 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4079 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4080 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4081 }
4082 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4083}
4084
4085
4086/**
4087 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4088 * registers.
4089 *
4090 * @returns The host register number; throws VBox status code on failure,
4091 * so no need to check the return value.
4092 * @param pReNative The native recompile state.
4093 * @param poff Pointer to the variable with the code buffer position.
4094 * This will be update if we need to move a variable from
4095 * register to stack in order to satisfy the request.
4096 * @param fRegMask Mask of acceptable registers.
4097 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4098 * registers (@c true, default) or the other way around
4099 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4100 */
4101DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4102 bool fPreferVolatile /*= true*/)
4103{
4104 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4105 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4106
4107 /*
4108 * Try find a completely unused register, preferably a call-volatile one.
4109 */
4110 uint8_t idxReg;
4111 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4112 & ~pReNative->Core.bmHstRegsWithGstShadow
4113 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4114 & fRegMask;
4115 if (fRegs)
4116 {
4117 if (fPreferVolatile)
4118 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4119 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4120 else
4121 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4122 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4123 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4124 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4125 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4126 }
4127 else
4128 {
4129 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4130 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4131 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4132 }
4133 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4134}
4135
4136
4137/**
4138 * Allocates a temporary register for loading an immediate value into.
4139 *
4140 * This will emit code to load the immediate, unless there happens to be an
4141 * unused register with the value already loaded.
4142 *
4143 * The caller will not modify the returned register, it must be considered
4144 * read-only. Free using iemNativeRegFreeTmpImm.
4145 *
4146 * @returns The host register number; throws VBox status code on failure, so no
4147 * need to check the return value.
4148 * @param pReNative The native recompile state.
4149 * @param poff Pointer to the variable with the code buffer position.
4150 * @param uImm The immediate value that the register must hold upon
4151 * return.
4152 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4153 * registers (@c true, default) or the other way around
4154 * (@c false).
4155 *
4156 * @note Reusing immediate values has not been implemented yet.
4157 */
4158DECL_HIDDEN_THROW(uint8_t)
4159iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4160{
4161 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4162 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4163 return idxReg;
4164}
4165
4166
4167/**
4168 * Allocates a temporary host general purpose register for keeping a guest
4169 * register value.
4170 *
4171 * Since we may already have a register holding the guest register value,
4172 * code will be emitted to do the loading if that's not the case. Code may also
4173 * be emitted if we have to free up a register to satify the request.
4174 *
4175 * @returns The host register number; throws VBox status code on failure, so no
4176 * need to check the return value.
4177 * @param pReNative The native recompile state.
4178 * @param poff Pointer to the variable with the code buffer
4179 * position. This will be update if we need to move a
4180 * variable from register to stack in order to satisfy
4181 * the request.
4182 * @param enmGstReg The guest register that will is to be updated.
4183 * @param enmIntendedUse How the caller will be using the host register.
4184 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4185 * register is okay (default). The ASSUMPTION here is
4186 * that the caller has already flushed all volatile
4187 * registers, so this is only applied if we allocate a
4188 * new register.
4189 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4190 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4191 */
4192DECL_HIDDEN_THROW(uint8_t)
4193iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4194 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4195 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4196{
4197 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4198#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4199 AssertMsg( fSkipLivenessAssert
4200 || pReNative->idxCurCall == 0
4201 || enmGstReg == kIemNativeGstReg_Pc
4202 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4203 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4204 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4205 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4206 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4207 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4208#endif
4209 RT_NOREF(fSkipLivenessAssert);
4210#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4211 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4212#endif
4213 uint32_t const fRegMask = !fNoVolatileRegs
4214 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4215 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4216
4217 /*
4218 * First check if the guest register value is already in a host register.
4219 */
4220 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4221 {
4222 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4223 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4224 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4225 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4226
4227 /* It's not supposed to be allocated... */
4228 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4229 {
4230 /*
4231 * If the register will trash the guest shadow copy, try find a
4232 * completely unused register we can use instead. If that fails,
4233 * we need to disassociate the host reg from the guest reg.
4234 */
4235 /** @todo would be nice to know if preserving the register is in any way helpful. */
4236 /* If the purpose is calculations, try duplicate the register value as
4237 we'll be clobbering the shadow. */
4238 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4239 && ( ~pReNative->Core.bmHstRegs
4240 & ~pReNative->Core.bmHstRegsWithGstShadow
4241 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4242 {
4243 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4244
4245 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4246
4247 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4248 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4249 g_apszIemNativeHstRegNames[idxRegNew]));
4250 idxReg = idxRegNew;
4251 }
4252 /* If the current register matches the restrictions, go ahead and allocate
4253 it for the caller. */
4254 else if (fRegMask & RT_BIT_32(idxReg))
4255 {
4256 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4257 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4258 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4259 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4260 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4261 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4262 else
4263 {
4264 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4265 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4266 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4267 }
4268 }
4269 /* Otherwise, allocate a register that satisfies the caller and transfer
4270 the shadowing if compatible with the intended use. (This basically
4271 means the call wants a non-volatile register (RSP push/pop scenario).) */
4272 else
4273 {
4274 Assert(fNoVolatileRegs);
4275 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4276 !fNoVolatileRegs
4277 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4278 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4279 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4280 {
4281 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4282 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4283 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4284 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4285 }
4286 else
4287 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4288 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4289 g_apszIemNativeHstRegNames[idxRegNew]));
4290 idxReg = idxRegNew;
4291 }
4292 }
4293 else
4294 {
4295 /*
4296 * Oops. Shadowed guest register already allocated!
4297 *
4298 * Allocate a new register, copy the value and, if updating, the
4299 * guest shadow copy assignment to the new register.
4300 */
4301 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4302 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4303 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4304 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4305
4306 /** @todo share register for readonly access. */
4307 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4308 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4309
4310 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4311 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4312
4313 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4314 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4315 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4316 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4317 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4318 else
4319 {
4320 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4321 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4322 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4323 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4324 }
4325 idxReg = idxRegNew;
4326 }
4327 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4328
4329#ifdef VBOX_STRICT
4330 /* Strict builds: Check that the value is correct. */
4331 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4332#endif
4333
4334 return idxReg;
4335 }
4336
4337 /*
4338 * Allocate a new register, load it with the guest value and designate it as a copy of the
4339 */
4340 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4341
4342 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4343 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4344
4345 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4346 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4347 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4348 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4349
4350 return idxRegNew;
4351}
4352
4353
4354/**
4355 * Allocates a temporary host general purpose register that already holds the
4356 * given guest register value.
4357 *
4358 * The use case for this function is places where the shadowing state cannot be
4359 * modified due to branching and such. This will fail if the we don't have a
4360 * current shadow copy handy or if it's incompatible. The only code that will
4361 * be emitted here is value checking code in strict builds.
4362 *
4363 * The intended use can only be readonly!
4364 *
4365 * @returns The host register number, UINT8_MAX if not present.
4366 * @param pReNative The native recompile state.
4367 * @param poff Pointer to the instruction buffer offset.
4368 * Will be updated in strict builds if a register is
4369 * found.
4370 * @param enmGstReg The guest register that will is to be updated.
4371 * @note In strict builds, this may throw instruction buffer growth failures.
4372 * Non-strict builds will not throw anything.
4373 * @sa iemNativeRegAllocTmpForGuestReg
4374 */
4375DECL_HIDDEN_THROW(uint8_t)
4376iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4377{
4378 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4379#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4380 AssertMsg( pReNative->idxCurCall == 0
4381 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4382 || enmGstReg == kIemNativeGstReg_Pc,
4383 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4384#endif
4385
4386 /*
4387 * First check if the guest register value is already in a host register.
4388 */
4389 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4390 {
4391 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4392 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4393 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4394 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4395
4396 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4397 {
4398 /*
4399 * We only do readonly use here, so easy compared to the other
4400 * variant of this code.
4401 */
4402 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4403 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4404 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4405 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4406 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4407
4408#ifdef VBOX_STRICT
4409 /* Strict builds: Check that the value is correct. */
4410 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4411#else
4412 RT_NOREF(poff);
4413#endif
4414 return idxReg;
4415 }
4416 }
4417
4418 return UINT8_MAX;
4419}
4420
4421
4422/**
4423 * Allocates argument registers for a function call.
4424 *
4425 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4426 * need to check the return value.
4427 * @param pReNative The native recompile state.
4428 * @param off The current code buffer offset.
4429 * @param cArgs The number of arguments the function call takes.
4430 */
4431DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4432{
4433 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4434 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4435 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4436 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4437
4438 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4439 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4440 else if (cArgs == 0)
4441 return true;
4442
4443 /*
4444 * Do we get luck and all register are free and not shadowing anything?
4445 */
4446 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4447 for (uint32_t i = 0; i < cArgs; i++)
4448 {
4449 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4450 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4451 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4452 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4453 }
4454 /*
4455 * Okay, not lucky so we have to free up the registers.
4456 */
4457 else
4458 for (uint32_t i = 0; i < cArgs; i++)
4459 {
4460 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4461 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4462 {
4463 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4464 {
4465 case kIemNativeWhat_Var:
4466 {
4467 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4469 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4470 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4471 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4472#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4473 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4474#endif
4475
4476 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4477 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4478 else
4479 {
4480 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4481 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4482 }
4483 break;
4484 }
4485
4486 case kIemNativeWhat_Tmp:
4487 case kIemNativeWhat_Arg:
4488 case kIemNativeWhat_rc:
4489 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4490 default:
4491 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4492 }
4493
4494 }
4495 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4496 {
4497 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4498 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4499 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4500 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4501 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4502 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4503 }
4504 else
4505 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4506 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4507 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4508 }
4509 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4510 return true;
4511}
4512
4513
4514DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4515
4516
4517#if 0
4518/**
4519 * Frees a register assignment of any type.
4520 *
4521 * @param pReNative The native recompile state.
4522 * @param idxHstReg The register to free.
4523 *
4524 * @note Does not update variables.
4525 */
4526DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4527{
4528 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4529 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4530 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4531 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4532 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4533 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4534 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4535 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4536 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4537 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4538 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4539 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4540 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4541 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4542
4543 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4544 /* no flushing, right:
4545 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4546 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4547 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4548 */
4549}
4550#endif
4551
4552
4553/**
4554 * Frees a temporary register.
4555 *
4556 * Any shadow copies of guest registers assigned to the host register will not
4557 * be flushed by this operation.
4558 */
4559DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4560{
4561 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4562 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4563 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4564 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4565 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4566}
4567
4568
4569/**
4570 * Frees a temporary immediate register.
4571 *
4572 * It is assumed that the call has not modified the register, so it still hold
4573 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4574 */
4575DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4576{
4577 iemNativeRegFreeTmp(pReNative, idxHstReg);
4578}
4579
4580
4581/**
4582 * Frees a register assigned to a variable.
4583 *
4584 * The register will be disassociated from the variable.
4585 */
4586DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4587{
4588 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4589 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4590 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4591 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4592 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4593#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4594 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4595#endif
4596
4597 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4598 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4599 if (!fFlushShadows)
4600 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4601 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4602 else
4603 {
4604 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4605 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4606 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4607 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4608 uint64_t fGstRegShadows = fGstRegShadowsOld;
4609 while (fGstRegShadows)
4610 {
4611 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4612 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4613
4614 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4615 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4616 }
4617 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4618 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4619 }
4620}
4621
4622
4623#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4624# ifdef LOG_ENABLED
4625/** Host CPU SIMD register names. */
4626DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4627{
4628# ifdef RT_ARCH_AMD64
4629 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4630# elif RT_ARCH_ARM64
4631 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4632 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4633# else
4634# error "port me"
4635# endif
4636};
4637# endif
4638
4639
4640/**
4641 * Frees a SIMD register assigned to a variable.
4642 *
4643 * The register will be disassociated from the variable.
4644 */
4645DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4646{
4647 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4648 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4649 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4650 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4651 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4652 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4653
4654 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4655 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4656 if (!fFlushShadows)
4657 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4658 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4659 else
4660 {
4661 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4662 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4663 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4664 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4665 uint64_t fGstRegShadows = fGstRegShadowsOld;
4666 while (fGstRegShadows)
4667 {
4668 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4669 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4670
4671 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4672 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4673 }
4674 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4675 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4676 }
4677}
4678#endif
4679
4680
4681/**
4682 * Called right before emitting a call instruction to move anything important
4683 * out of call-volatile registers, free and flush the call-volatile registers,
4684 * optionally freeing argument variables.
4685 *
4686 * @returns New code buffer offset, UINT32_MAX on failure.
4687 * @param pReNative The native recompile state.
4688 * @param off The code buffer offset.
4689 * @param cArgs The number of arguments the function call takes.
4690 * It is presumed that the host register part of these have
4691 * been allocated as such already and won't need moving,
4692 * just freeing.
4693 * @param fKeepVars Mask of variables that should keep their register
4694 * assignments. Caller must take care to handle these.
4695 */
4696DECL_HIDDEN_THROW(uint32_t)
4697iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4698{
4699 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4700
4701 /* fKeepVars will reduce this mask. */
4702 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4703
4704 /*
4705 * Move anything important out of volatile registers.
4706 */
4707 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4708 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4709 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4710#ifdef IEMNATIVE_REG_FIXED_TMP0
4711 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4712#endif
4713#ifdef IEMNATIVE_REG_FIXED_TMP1
4714 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4715#endif
4716#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4717 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4718#endif
4719 & ~g_afIemNativeCallRegs[cArgs];
4720
4721 fRegsToMove &= pReNative->Core.bmHstRegs;
4722 if (!fRegsToMove)
4723 { /* likely */ }
4724 else
4725 {
4726 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4727 while (fRegsToMove != 0)
4728 {
4729 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4730 fRegsToMove &= ~RT_BIT_32(idxReg);
4731
4732 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4733 {
4734 case kIemNativeWhat_Var:
4735 {
4736 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4737 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4738 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4739 Assert(pVar->idxReg == idxReg);
4740 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4741 {
4742 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4743 idxVar, pVar->enmKind, pVar->idxReg));
4744 if (pVar->enmKind != kIemNativeVarKind_Stack)
4745 pVar->idxReg = UINT8_MAX;
4746 else
4747 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4748 }
4749 else
4750 fRegsToFree &= ~RT_BIT_32(idxReg);
4751 continue;
4752 }
4753
4754 case kIemNativeWhat_Arg:
4755 AssertMsgFailed(("What?!?: %u\n", idxReg));
4756 continue;
4757
4758 case kIemNativeWhat_rc:
4759 case kIemNativeWhat_Tmp:
4760 AssertMsgFailed(("Missing free: %u\n", idxReg));
4761 continue;
4762
4763 case kIemNativeWhat_FixedTmp:
4764 case kIemNativeWhat_pVCpuFixed:
4765 case kIemNativeWhat_pCtxFixed:
4766 case kIemNativeWhat_PcShadow:
4767 case kIemNativeWhat_FixedReserved:
4768 case kIemNativeWhat_Invalid:
4769 case kIemNativeWhat_End:
4770 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4771 }
4772 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4773 }
4774 }
4775
4776 /*
4777 * Do the actual freeing.
4778 */
4779 if (pReNative->Core.bmHstRegs & fRegsToFree)
4780 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4781 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4782 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4783
4784 /* If there are guest register shadows in any call-volatile register, we
4785 have to clear the corrsponding guest register masks for each register. */
4786 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4787 if (fHstRegsWithGstShadow)
4788 {
4789 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4790 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4791 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4792 do
4793 {
4794 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4795 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4796
4797 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4798 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4799 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4800 } while (fHstRegsWithGstShadow != 0);
4801 }
4802
4803 return off;
4804}
4805
4806
4807/**
4808 * Flushes a set of guest register shadow copies.
4809 *
4810 * This is usually done after calling a threaded function or a C-implementation
4811 * of an instruction.
4812 *
4813 * @param pReNative The native recompile state.
4814 * @param fGstRegs Set of guest registers to flush.
4815 */
4816DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4817{
4818 /*
4819 * Reduce the mask by what's currently shadowed
4820 */
4821 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4822 fGstRegs &= bmGstRegShadowsOld;
4823 if (fGstRegs)
4824 {
4825 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4826 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4827 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4828 if (bmGstRegShadowsNew)
4829 {
4830 /*
4831 * Partial.
4832 */
4833 do
4834 {
4835 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4836 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4837 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4838 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4839 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4840
4841 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4842 fGstRegs &= ~fInThisHstReg;
4843 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4844 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4845 if (!fGstRegShadowsNew)
4846 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4847 } while (fGstRegs != 0);
4848 }
4849 else
4850 {
4851 /*
4852 * Clear all.
4853 */
4854 do
4855 {
4856 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4857 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4858 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4859 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4860 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4861
4862 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4863 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4864 } while (fGstRegs != 0);
4865 pReNative->Core.bmHstRegsWithGstShadow = 0;
4866 }
4867 }
4868}
4869
4870
4871/**
4872 * Flushes guest register shadow copies held by a set of host registers.
4873 *
4874 * This is used with the TLB lookup code for ensuring that we don't carry on
4875 * with any guest shadows in volatile registers, as these will get corrupted by
4876 * a TLB miss.
4877 *
4878 * @param pReNative The native recompile state.
4879 * @param fHstRegs Set of host registers to flush guest shadows for.
4880 */
4881DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4882{
4883 /*
4884 * Reduce the mask by what's currently shadowed.
4885 */
4886 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4887 fHstRegs &= bmHstRegsWithGstShadowOld;
4888 if (fHstRegs)
4889 {
4890 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4891 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4892 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4893 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4894 if (bmHstRegsWithGstShadowNew)
4895 {
4896 /*
4897 * Partial (likely).
4898 */
4899 uint64_t fGstShadows = 0;
4900 do
4901 {
4902 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4903 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4904 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4905 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4906
4907 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4908 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4909 fHstRegs &= ~RT_BIT_32(idxHstReg);
4910 } while (fHstRegs != 0);
4911 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4912 }
4913 else
4914 {
4915 /*
4916 * Clear all.
4917 */
4918 do
4919 {
4920 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4921 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4922 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4923 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4924
4925 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4926 fHstRegs &= ~RT_BIT_32(idxHstReg);
4927 } while (fHstRegs != 0);
4928 pReNative->Core.bmGstRegShadows = 0;
4929 }
4930 }
4931}
4932
4933
4934/**
4935 * Restores guest shadow copies in volatile registers.
4936 *
4937 * This is used after calling a helper function (think TLB miss) to restore the
4938 * register state of volatile registers.
4939 *
4940 * @param pReNative The native recompile state.
4941 * @param off The code buffer offset.
4942 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4943 * be active (allocated) w/o asserting. Hack.
4944 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4945 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4946 */
4947DECL_HIDDEN_THROW(uint32_t)
4948iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4949{
4950 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4951 if (fHstRegs)
4952 {
4953 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4954 do
4955 {
4956 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4957
4958 /* It's not fatal if a register is active holding a variable that
4959 shadowing a guest register, ASSUMING all pending guest register
4960 writes were flushed prior to the helper call. However, we'll be
4961 emitting duplicate restores, so it wasts code space. */
4962 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4963 RT_NOREF(fHstRegsActiveShadows);
4964
4965 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4966 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4967 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4968 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4969
4970 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4971 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4972
4973 fHstRegs &= ~RT_BIT_32(idxHstReg);
4974 } while (fHstRegs != 0);
4975 }
4976 return off;
4977}
4978
4979
4980
4981
4982/*********************************************************************************************************************************
4983* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4984*********************************************************************************************************************************/
4985#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4986
4987/**
4988 * Info about shadowed guest SIMD register values.
4989 * @see IEMNATIVEGSTSIMDREG
4990 */
4991static struct
4992{
4993 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4994 uint32_t offXmm;
4995 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4996 uint32_t offYmm;
4997 /** Name (for logging). */
4998 const char *pszName;
4999} const g_aGstSimdShadowInfo[] =
5000{
5001#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5002 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5003 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5004 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5005 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5006 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5007 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5008 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5009 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5010 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5011 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5012 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5013 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5014 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5015 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5016 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5017 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5018 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5019#undef CPUMCTX_OFF_AND_SIZE
5020};
5021AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5022
5023
5024/**
5025 * Frees a temporary SIMD register.
5026 *
5027 * Any shadow copies of guest registers assigned to the host register will not
5028 * be flushed by this operation.
5029 */
5030DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5031{
5032 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5033 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5034 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5035 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5036 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5037}
5038
5039
5040/**
5041 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5042 *
5043 * @returns New code bufferoffset.
5044 * @param pReNative The native recompile state.
5045 * @param off Current code buffer position.
5046 * @param enmGstSimdReg The guest SIMD register to flush.
5047 */
5048DECL_HIDDEN_THROW(uint32_t)
5049iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5050{
5051 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5052
5053 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5054 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5055 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5056 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5057
5058 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5059 {
5060 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5061 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5062 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5063 }
5064
5065 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5066 {
5067 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5068 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5069 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5070 }
5071
5072 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5073 return off;
5074}
5075
5076
5077/**
5078 * Locate a register, possibly freeing one up.
5079 *
5080 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5081 * failed.
5082 *
5083 * @returns Host register number on success. Returns UINT8_MAX if no registers
5084 * found, the caller is supposed to deal with this and raise a
5085 * allocation type specific status code (if desired).
5086 *
5087 * @throws VBox status code if we're run into trouble spilling a variable of
5088 * recording debug info. Does NOT throw anything if we're out of
5089 * registers, though.
5090 */
5091static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5092 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5093{
5094 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5095 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5096 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5097
5098 /*
5099 * Try a freed register that's shadowing a guest register.
5100 */
5101 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5102 if (fRegs)
5103 {
5104 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5105
5106#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5107 /*
5108 * When we have livness information, we use it to kick out all shadowed
5109 * guest register that will not be needed any more in this TB. If we're
5110 * lucky, this may prevent us from ending up here again.
5111 *
5112 * Note! We must consider the previous entry here so we don't free
5113 * anything that the current threaded function requires (current
5114 * entry is produced by the next threaded function).
5115 */
5116 uint32_t const idxCurCall = pReNative->idxCurCall;
5117 if (idxCurCall > 0)
5118 {
5119 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5120
5121# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5122 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5123 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5124 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5125#else
5126 /* Construct a mask of the registers not in the read or write state.
5127 Note! We could skips writes, if they aren't from us, as this is just
5128 a hack to prevent trashing registers that have just been written
5129 or will be written when we retire the current instruction. */
5130 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5131 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5132 & IEMLIVENESSBIT_MASK;
5133#endif
5134 /* If it matches any shadowed registers. */
5135 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5136 {
5137 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5138 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5139 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5140
5141 /* See if we've got any unshadowed registers we can return now. */
5142 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5143 if (fUnshadowedRegs)
5144 {
5145 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5146 return (fPreferVolatile
5147 ? ASMBitFirstSetU32(fUnshadowedRegs)
5148 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5149 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5150 - 1;
5151 }
5152 }
5153 }
5154#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5155
5156 unsigned const idxReg = (fPreferVolatile
5157 ? ASMBitFirstSetU32(fRegs)
5158 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5159 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5160 - 1;
5161
5162 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5163 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5164 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5165 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5166
5167 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5168 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5169 uint32_t idxGstSimdReg = 0;
5170 do
5171 {
5172 if (fGstRegShadows & 0x1)
5173 {
5174 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5175 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5176 }
5177 idxGstSimdReg++;
5178 fGstRegShadows >>= 1;
5179 } while (fGstRegShadows);
5180
5181 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5182 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5183 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5184 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5185 return idxReg;
5186 }
5187
5188 /*
5189 * Try free up a variable that's in a register.
5190 *
5191 * We do two rounds here, first evacuating variables we don't need to be
5192 * saved on the stack, then in the second round move things to the stack.
5193 */
5194 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5195 AssertReleaseFailed(); /** @todo No variable support right now. */
5196#if 0
5197 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5198 {
5199 uint32_t fVars = pReNative->Core.bmSimdVars;
5200 while (fVars)
5201 {
5202 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5203 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5204 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5205 && (RT_BIT_32(idxReg) & fRegMask)
5206 && ( iLoop == 0
5207 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5208 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5209 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5210 {
5211 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5212 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5213 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5214 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5215 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5216 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5217
5218 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5219 {
5220 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5221 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5222 }
5223
5224 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5225 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5226
5227 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5228 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5229 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5230 return idxReg;
5231 }
5232 fVars &= ~RT_BIT_32(idxVar);
5233 }
5234 }
5235#endif
5236
5237 AssertFailed();
5238 return UINT8_MAX;
5239}
5240
5241
5242/**
5243 * Flushes a set of guest register shadow copies.
5244 *
5245 * This is usually done after calling a threaded function or a C-implementation
5246 * of an instruction.
5247 *
5248 * @param pReNative The native recompile state.
5249 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5250 */
5251DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5252{
5253 /*
5254 * Reduce the mask by what's currently shadowed
5255 */
5256 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5257 fGstSimdRegs &= bmGstSimdRegShadows;
5258 if (fGstSimdRegs)
5259 {
5260 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5261 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5262 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5263 if (bmGstSimdRegShadowsNew)
5264 {
5265 /*
5266 * Partial.
5267 */
5268 do
5269 {
5270 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5271 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5272 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5273 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5274 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5275 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5276
5277 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5278 fGstSimdRegs &= ~fInThisHstReg;
5279 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5280 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5281 if (!fGstRegShadowsNew)
5282 {
5283 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5284 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5285 }
5286 } while (fGstSimdRegs != 0);
5287 }
5288 else
5289 {
5290 /*
5291 * Clear all.
5292 */
5293 do
5294 {
5295 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5296 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5297 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5298 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5299 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5300 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5301
5302 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5303 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5304 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5305 } while (fGstSimdRegs != 0);
5306 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5307 }
5308 }
5309}
5310
5311
5312/**
5313 * Allocates a temporary host SIMD register.
5314 *
5315 * This may emit code to save register content onto the stack in order to free
5316 * up a register.
5317 *
5318 * @returns The host register number; throws VBox status code on failure,
5319 * so no need to check the return value.
5320 * @param pReNative The native recompile state.
5321 * @param poff Pointer to the variable with the code buffer position.
5322 * This will be update if we need to move a variable from
5323 * register to stack in order to satisfy the request.
5324 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5325 * registers (@c true, default) or the other way around
5326 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5327 */
5328DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5329{
5330 /*
5331 * Try find a completely unused register, preferably a call-volatile one.
5332 */
5333 uint8_t idxSimdReg;
5334 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5335 & ~pReNative->Core.bmHstRegsWithGstShadow
5336 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5337 if (fRegs)
5338 {
5339 if (fPreferVolatile)
5340 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5341 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5342 else
5343 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5344 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5345 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5346 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5347 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5348 }
5349 else
5350 {
5351 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5352 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5353 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5354 }
5355
5356 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5357 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5358}
5359
5360
5361/**
5362 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5363 * registers.
5364 *
5365 * @returns The host register number; throws VBox status code on failure,
5366 * so no need to check the return value.
5367 * @param pReNative The native recompile state.
5368 * @param poff Pointer to the variable with the code buffer position.
5369 * This will be update if we need to move a variable from
5370 * register to stack in order to satisfy the request.
5371 * @param fRegMask Mask of acceptable registers.
5372 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5373 * registers (@c true, default) or the other way around
5374 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5375 */
5376DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5377 bool fPreferVolatile /*= true*/)
5378{
5379 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5380 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5381
5382 /*
5383 * Try find a completely unused register, preferably a call-volatile one.
5384 */
5385 uint8_t idxSimdReg;
5386 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5387 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5388 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5389 & fRegMask;
5390 if (fRegs)
5391 {
5392 if (fPreferVolatile)
5393 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5394 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5395 else
5396 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5397 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5398 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5399 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5400 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5401 }
5402 else
5403 {
5404 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5405 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5406 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5407 }
5408
5409 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5410 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5411}
5412
5413
5414/**
5415 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5416 *
5417 * @param pReNative The native recompile state.
5418 * @param idxHstSimdReg The host SIMD register to update the state for.
5419 * @param enmLoadSz The load size to set.
5420 */
5421DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5422 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5423{
5424 /* Everything valid already? -> nothing to do. */
5425 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5426 return;
5427
5428 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5429 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5430 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5431 {
5432 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5433 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5434 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5435 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5436 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5437 }
5438}
5439
5440
5441static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5442 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5443{
5444 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5445 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5446 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5447 {
5448# ifdef RT_ARCH_ARM64
5449 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5450 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5451# endif
5452
5453 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5454 {
5455 switch (enmLoadSzDst)
5456 {
5457 case kIemNativeGstSimdRegLdStSz_256:
5458 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5459 break;
5460 case kIemNativeGstSimdRegLdStSz_Low128:
5461 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5462 break;
5463 case kIemNativeGstSimdRegLdStSz_High128:
5464 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5465 break;
5466 default:
5467 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5468 }
5469
5470 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5471 }
5472 }
5473 else
5474 {
5475 /* Complicated stuff where the source is currently missing something, later. */
5476 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5477 }
5478
5479 return off;
5480}
5481
5482
5483/**
5484 * Allocates a temporary host SIMD register for keeping a guest
5485 * SIMD register value.
5486 *
5487 * Since we may already have a register holding the guest register value,
5488 * code will be emitted to do the loading if that's not the case. Code may also
5489 * be emitted if we have to free up a register to satify the request.
5490 *
5491 * @returns The host register number; throws VBox status code on failure, so no
5492 * need to check the return value.
5493 * @param pReNative The native recompile state.
5494 * @param poff Pointer to the variable with the code buffer
5495 * position. This will be update if we need to move a
5496 * variable from register to stack in order to satisfy
5497 * the request.
5498 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5499 * @param enmIntendedUse How the caller will be using the host register.
5500 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5501 * register is okay (default). The ASSUMPTION here is
5502 * that the caller has already flushed all volatile
5503 * registers, so this is only applied if we allocate a
5504 * new register.
5505 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5506 */
5507DECL_HIDDEN_THROW(uint8_t)
5508iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5509 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5510 bool fNoVolatileRegs /*= false*/)
5511{
5512 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5513#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5514 AssertMsg( pReNative->idxCurCall == 0
5515 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5516 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5517 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5518 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5519 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5520 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5521#endif
5522#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5523 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5524#endif
5525 uint32_t const fRegMask = !fNoVolatileRegs
5526 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5527 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5528
5529 /*
5530 * First check if the guest register value is already in a host register.
5531 */
5532 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5533 {
5534 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5535 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5536 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5537 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5538
5539 /* It's not supposed to be allocated... */
5540 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5541 {
5542 /*
5543 * If the register will trash the guest shadow copy, try find a
5544 * completely unused register we can use instead. If that fails,
5545 * we need to disassociate the host reg from the guest reg.
5546 */
5547 /** @todo would be nice to know if preserving the register is in any way helpful. */
5548 /* If the purpose is calculations, try duplicate the register value as
5549 we'll be clobbering the shadow. */
5550 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5551 && ( ~pReNative->Core.bmHstSimdRegs
5552 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5553 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5554 {
5555 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5556
5557 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5558
5559 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5560 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5561 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5562 idxSimdReg = idxRegNew;
5563 }
5564 /* If the current register matches the restrictions, go ahead and allocate
5565 it for the caller. */
5566 else if (fRegMask & RT_BIT_32(idxSimdReg))
5567 {
5568 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5569 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5570 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5571 {
5572 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5573 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5574 else
5575 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5576 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5577 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5578 }
5579 else
5580 {
5581 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5582 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5583 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5584 }
5585 }
5586 /* Otherwise, allocate a register that satisfies the caller and transfer
5587 the shadowing if compatible with the intended use. (This basically
5588 means the call wants a non-volatile register (RSP push/pop scenario).) */
5589 else
5590 {
5591 Assert(fNoVolatileRegs);
5592 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5593 !fNoVolatileRegs
5594 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5595 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5596 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5597 {
5598 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5599 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5600 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5601 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5602 }
5603 else
5604 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5605 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5606 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5607 idxSimdReg = idxRegNew;
5608 }
5609 }
5610 else
5611 {
5612 /*
5613 * Oops. Shadowed guest register already allocated!
5614 *
5615 * Allocate a new register, copy the value and, if updating, the
5616 * guest shadow copy assignment to the new register.
5617 */
5618 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5619 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5620 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5621 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5622
5623 /** @todo share register for readonly access. */
5624 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5625 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5626
5627 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5628 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5629 else
5630 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5631
5632 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5633 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5634 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5635 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5636 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5637 else
5638 {
5639 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5640 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5641 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5642 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5643 }
5644 idxSimdReg = idxRegNew;
5645 }
5646 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5647
5648#ifdef VBOX_STRICT
5649 /* Strict builds: Check that the value is correct. */
5650 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5651 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5652#endif
5653
5654 return idxSimdReg;
5655 }
5656
5657 /*
5658 * Allocate a new register, load it with the guest value and designate it as a copy of the
5659 */
5660 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5661
5662 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5663 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5664 else
5665 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5666
5667 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5668 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5669
5670 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5671 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5672
5673 return idxRegNew;
5674}
5675
5676#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5677
5678
5679
5680/*********************************************************************************************************************************
5681* Code emitters for flushing pending guest register writes and sanity checks *
5682*********************************************************************************************************************************/
5683
5684#ifdef VBOX_STRICT
5685/**
5686 * Does internal register allocator sanity checks.
5687 */
5688DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5689{
5690 /*
5691 * Iterate host registers building a guest shadowing set.
5692 */
5693 uint64_t bmGstRegShadows = 0;
5694 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5695 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5696 while (bmHstRegsWithGstShadow)
5697 {
5698 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5699 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5700 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5701
5702 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5703 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5704 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5705 bmGstRegShadows |= fThisGstRegShadows;
5706 while (fThisGstRegShadows)
5707 {
5708 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5709 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5710 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5711 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5712 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5713 }
5714 }
5715 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5716 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5717 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5718
5719 /*
5720 * Now the other way around, checking the guest to host index array.
5721 */
5722 bmHstRegsWithGstShadow = 0;
5723 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5724 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5725 while (bmGstRegShadows)
5726 {
5727 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5728 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5729 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5730
5731 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5732 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5733 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5734 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5735 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5736 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5737 }
5738 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5739 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5740 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5741}
5742#endif /* VBOX_STRICT */
5743
5744
5745/**
5746 * Flushes any delayed guest register writes.
5747 *
5748 * This must be called prior to calling CImpl functions and any helpers that use
5749 * the guest state (like raising exceptions) and such.
5750 *
5751 * This optimization has not yet been implemented. The first target would be
5752 * RIP updates, since these are the most common ones.
5753 */
5754DECL_HIDDEN_THROW(uint32_t)
5755iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5756{
5757#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5758 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5759 off = iemNativeEmitPcWriteback(pReNative, off);
5760#else
5761 RT_NOREF(pReNative, fGstShwExcept);
5762#endif
5763
5764#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5765 /** @todo r=bird: There must be a quicker way to check if anything needs
5766 * doing and then call simd function to do the flushing */
5767 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5768 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5769 {
5770 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5771 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5772
5773 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5774 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5775
5776 if ( fFlushShadows
5777 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5778 {
5779 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5780
5781 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5782 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5783 }
5784 }
5785#else
5786 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5787#endif
5788
5789 return off;
5790}
5791
5792
5793#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5794/**
5795 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5796 */
5797DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5798{
5799 Assert(pReNative->Core.offPc);
5800# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5801 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5802 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5803# endif
5804
5805# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5806 /* Allocate a temporary PC register. */
5807 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5808
5809 /* Perform the addition and store the result. */
5810 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5811 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5812
5813 /* Free but don't flush the PC register. */
5814 iemNativeRegFreeTmp(pReNative, idxPcReg);
5815# else
5816 /* Compare the shadow with the context value, they should match. */
5817 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5818 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5819# endif
5820
5821 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5822 pReNative->Core.offPc = 0;
5823 pReNative->Core.cInstrPcUpdateSkipped = 0;
5824
5825 return off;
5826}
5827#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5828
5829
5830/*********************************************************************************************************************************
5831* Code Emitters (larger snippets) *
5832*********************************************************************************************************************************/
5833
5834/**
5835 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5836 * extending to 64-bit width.
5837 *
5838 * @returns New code buffer offset on success, UINT32_MAX on failure.
5839 * @param pReNative .
5840 * @param off The current code buffer position.
5841 * @param idxHstReg The host register to load the guest register value into.
5842 * @param enmGstReg The guest register to load.
5843 *
5844 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5845 * that is something the caller needs to do if applicable.
5846 */
5847DECL_HIDDEN_THROW(uint32_t)
5848iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5849{
5850 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5851 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5852
5853 switch (g_aGstShadowInfo[enmGstReg].cb)
5854 {
5855 case sizeof(uint64_t):
5856 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5857 case sizeof(uint32_t):
5858 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5859 case sizeof(uint16_t):
5860 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5861#if 0 /* not present in the table. */
5862 case sizeof(uint8_t):
5863 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5864#endif
5865 default:
5866 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5867 }
5868}
5869
5870
5871#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5872/**
5873 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5874 *
5875 * @returns New code buffer offset on success, UINT32_MAX on failure.
5876 * @param pReNative The recompiler state.
5877 * @param off The current code buffer position.
5878 * @param idxHstSimdReg The host register to load the guest register value into.
5879 * @param enmGstSimdReg The guest register to load.
5880 * @param enmLoadSz The load size of the register.
5881 *
5882 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5883 * that is something the caller needs to do if applicable.
5884 */
5885DECL_HIDDEN_THROW(uint32_t)
5886iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5887 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5888{
5889 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5890
5891 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5892 switch (enmLoadSz)
5893 {
5894 case kIemNativeGstSimdRegLdStSz_256:
5895 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5896 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5897 case kIemNativeGstSimdRegLdStSz_Low128:
5898 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5899 case kIemNativeGstSimdRegLdStSz_High128:
5900 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5901 default:
5902 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5903 }
5904}
5905#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5906
5907#ifdef VBOX_STRICT
5908
5909/**
5910 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5911 *
5912 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5913 * Trashes EFLAGS on AMD64.
5914 */
5915DECL_HIDDEN_THROW(uint32_t)
5916iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5917{
5918# ifdef RT_ARCH_AMD64
5919 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5920
5921 /* rol reg64, 32 */
5922 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5923 pbCodeBuf[off++] = 0xc1;
5924 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5925 pbCodeBuf[off++] = 32;
5926
5927 /* test reg32, ffffffffh */
5928 if (idxReg >= 8)
5929 pbCodeBuf[off++] = X86_OP_REX_B;
5930 pbCodeBuf[off++] = 0xf7;
5931 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5932 pbCodeBuf[off++] = 0xff;
5933 pbCodeBuf[off++] = 0xff;
5934 pbCodeBuf[off++] = 0xff;
5935 pbCodeBuf[off++] = 0xff;
5936
5937 /* je/jz +1 */
5938 pbCodeBuf[off++] = 0x74;
5939 pbCodeBuf[off++] = 0x01;
5940
5941 /* int3 */
5942 pbCodeBuf[off++] = 0xcc;
5943
5944 /* rol reg64, 32 */
5945 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5946 pbCodeBuf[off++] = 0xc1;
5947 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5948 pbCodeBuf[off++] = 32;
5949
5950# elif defined(RT_ARCH_ARM64)
5951 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5952 /* lsr tmp0, reg64, #32 */
5953 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5954 /* cbz tmp0, +1 */
5955 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5956 /* brk #0x1100 */
5957 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5958
5959# else
5960# error "Port me!"
5961# endif
5962 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5963 return off;
5964}
5965
5966
5967/**
5968 * Emitting code that checks that the content of register @a idxReg is the same
5969 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5970 * instruction if that's not the case.
5971 *
5972 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5973 * Trashes EFLAGS on AMD64.
5974 */
5975DECL_HIDDEN_THROW(uint32_t)
5976iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5977{
5978# ifdef RT_ARCH_AMD64
5979 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5980
5981 /* cmp reg, [mem] */
5982 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5983 {
5984 if (idxReg >= 8)
5985 pbCodeBuf[off++] = X86_OP_REX_R;
5986 pbCodeBuf[off++] = 0x38;
5987 }
5988 else
5989 {
5990 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5991 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5992 else
5993 {
5994 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5995 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5996 else
5997 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5998 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5999 if (idxReg >= 8)
6000 pbCodeBuf[off++] = X86_OP_REX_R;
6001 }
6002 pbCodeBuf[off++] = 0x39;
6003 }
6004 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6005
6006 /* je/jz +1 */
6007 pbCodeBuf[off++] = 0x74;
6008 pbCodeBuf[off++] = 0x01;
6009
6010 /* int3 */
6011 pbCodeBuf[off++] = 0xcc;
6012
6013 /* For values smaller than the register size, we must check that the rest
6014 of the register is all zeros. */
6015 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6016 {
6017 /* test reg64, imm32 */
6018 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6019 pbCodeBuf[off++] = 0xf7;
6020 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6021 pbCodeBuf[off++] = 0;
6022 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6023 pbCodeBuf[off++] = 0xff;
6024 pbCodeBuf[off++] = 0xff;
6025
6026 /* je/jz +1 */
6027 pbCodeBuf[off++] = 0x74;
6028 pbCodeBuf[off++] = 0x01;
6029
6030 /* int3 */
6031 pbCodeBuf[off++] = 0xcc;
6032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6033 }
6034 else
6035 {
6036 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6037 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6038 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6039 }
6040
6041# elif defined(RT_ARCH_ARM64)
6042 /* mov TMP0, [gstreg] */
6043 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6044
6045 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6046 /* sub tmp0, tmp0, idxReg */
6047 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6048 /* cbz tmp0, +1 */
6049 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6050 /* brk #0x1000+enmGstReg */
6051 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6052 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6053
6054# else
6055# error "Port me!"
6056# endif
6057 return off;
6058}
6059
6060
6061# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6062/**
6063 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6064 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6065 * instruction if that's not the case.
6066 *
6067 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6068 * Trashes EFLAGS on AMD64.
6069 */
6070DECL_HIDDEN_THROW(uint32_t)
6071iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6072 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6073{
6074 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6075 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6076 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6077 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6078 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6079 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6080 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6081 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6082 return off;
6083
6084# ifdef RT_ARCH_AMD64
6085 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6086
6087 /* movdqa vectmp0, idxSimdReg */
6088 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6089
6090 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6091
6092 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6093 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6094 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
6095 pbCodeBuf[off++] = X86_OP_REX_R;
6096 pbCodeBuf[off++] = 0x0f;
6097 pbCodeBuf[off++] = 0x38;
6098 pbCodeBuf[off++] = 0x29;
6099 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6100
6101 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6102 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6103 pbCodeBuf[off++] = X86_OP_REX_W
6104 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6105 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6106 pbCodeBuf[off++] = 0x0f;
6107 pbCodeBuf[off++] = 0x3a;
6108 pbCodeBuf[off++] = 0x16;
6109 pbCodeBuf[off++] = 0xeb;
6110 pbCodeBuf[off++] = 0x00;
6111
6112 /* cmp tmp0, 0xffffffffffffffff. */
6113 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6114 pbCodeBuf[off++] = 0x83;
6115 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6116 pbCodeBuf[off++] = 0xff;
6117
6118 /* je/jz +1 */
6119 pbCodeBuf[off++] = 0x74;
6120 pbCodeBuf[off++] = 0x01;
6121
6122 /* int3 */
6123 pbCodeBuf[off++] = 0xcc;
6124
6125 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6126 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6127 pbCodeBuf[off++] = X86_OP_REX_W
6128 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6129 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6130 pbCodeBuf[off++] = 0x0f;
6131 pbCodeBuf[off++] = 0x3a;
6132 pbCodeBuf[off++] = 0x16;
6133 pbCodeBuf[off++] = 0xeb;
6134 pbCodeBuf[off++] = 0x01;
6135
6136 /* cmp tmp0, 0xffffffffffffffff. */
6137 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6138 pbCodeBuf[off++] = 0x83;
6139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6140 pbCodeBuf[off++] = 0xff;
6141
6142 /* je/jz +1 */
6143 pbCodeBuf[off++] = 0x74;
6144 pbCodeBuf[off++] = 0x01;
6145
6146 /* int3 */
6147 pbCodeBuf[off++] = 0xcc;
6148
6149# elif defined(RT_ARCH_ARM64)
6150 /* mov vectmp0, [gstreg] */
6151 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6152
6153 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6154 {
6155 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6156 /* eor vectmp0, vectmp0, idxSimdReg */
6157 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6158 /* cnt vectmp0, vectmp0, #0*/
6159 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6160 /* umov tmp0, vectmp0.D[0] */
6161 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6162 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6163 /* cbz tmp0, +1 */
6164 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6165 /* brk #0x1000+enmGstReg */
6166 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6167 }
6168
6169 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6170 {
6171 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6172 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6173 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6174 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6175 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6176 /* umov tmp0, (vectmp0 + 1).D[0] */
6177 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6178 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6179 /* cbz tmp0, +1 */
6180 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6181 /* brk #0x1000+enmGstReg */
6182 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6183 }
6184
6185# else
6186# error "Port me!"
6187# endif
6188
6189 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6190 return off;
6191}
6192# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6193
6194
6195/**
6196 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6197 * important bits.
6198 *
6199 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6200 * Trashes EFLAGS on AMD64.
6201 */
6202DECL_HIDDEN_THROW(uint32_t)
6203iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6204{
6205 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6206 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6207 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6208 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6209
6210#ifdef RT_ARCH_AMD64
6211 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6212
6213 /* je/jz +1 */
6214 pbCodeBuf[off++] = 0x74;
6215 pbCodeBuf[off++] = 0x01;
6216
6217 /* int3 */
6218 pbCodeBuf[off++] = 0xcc;
6219
6220# elif defined(RT_ARCH_ARM64)
6221 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6222
6223 /* b.eq +1 */
6224 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6225 /* brk #0x2000 */
6226 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6227
6228# else
6229# error "Port me!"
6230# endif
6231 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6232
6233 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6234 return off;
6235}
6236
6237#endif /* VBOX_STRICT */
6238
6239
6240#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6241/**
6242 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6243 */
6244DECL_HIDDEN_THROW(uint32_t)
6245iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6246{
6247 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6248
6249 fEflNeeded &= X86_EFL_STATUS_BITS;
6250 if (fEflNeeded)
6251 {
6252# ifdef RT_ARCH_AMD64
6253 /* test dword [pVCpu + offVCpu], imm32 */
6254 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6255 if (fEflNeeded <= 0xff)
6256 {
6257 pCodeBuf[off++] = 0xf6;
6258 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6259 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6260 }
6261 else
6262 {
6263 pCodeBuf[off++] = 0xf7;
6264 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6265 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6266 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6267 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6268 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6269 }
6270 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6271
6272# else
6273 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6274 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6275 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6276# ifdef RT_ARCH_ARM64
6277 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6278 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6279# else
6280# error "Port me!"
6281# endif
6282 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6283# endif
6284 }
6285 return off;
6286}
6287#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6288
6289
6290/**
6291 * Emits a code for checking the return code of a call and rcPassUp, returning
6292 * from the code if either are non-zero.
6293 */
6294DECL_HIDDEN_THROW(uint32_t)
6295iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6296{
6297#ifdef RT_ARCH_AMD64
6298 /*
6299 * AMD64: eax = call status code.
6300 */
6301
6302 /* edx = rcPassUp */
6303 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6304# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6305 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6306# endif
6307
6308 /* edx = eax | rcPassUp */
6309 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6310 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6311 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6312 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6313
6314 /* Jump to non-zero status return path. */
6315 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6316
6317 /* done. */
6318
6319#elif RT_ARCH_ARM64
6320 /*
6321 * ARM64: w0 = call status code.
6322 */
6323# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6324 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6325# endif
6326 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6327
6328 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6329
6330 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6331
6332 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6333 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6334 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6335
6336#else
6337# error "port me"
6338#endif
6339 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6340 RT_NOREF_PV(idxInstr);
6341 return off;
6342}
6343
6344
6345/**
6346 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6347 * raising a \#GP(0) if it isn't.
6348 *
6349 * @returns New code buffer offset, UINT32_MAX on failure.
6350 * @param pReNative The native recompile state.
6351 * @param off The code buffer offset.
6352 * @param idxAddrReg The host register with the address to check.
6353 * @param idxInstr The current instruction.
6354 */
6355DECL_HIDDEN_THROW(uint32_t)
6356iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6357{
6358 /*
6359 * Make sure we don't have any outstanding guest register writes as we may
6360 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6361 */
6362 off = iemNativeRegFlushPendingWrites(pReNative, off);
6363
6364#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6365 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6366#else
6367 RT_NOREF(idxInstr);
6368#endif
6369
6370#ifdef RT_ARCH_AMD64
6371 /*
6372 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6373 * return raisexcpt();
6374 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6375 */
6376 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6377
6378 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6379 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6380 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6381 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6382 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6383
6384 iemNativeRegFreeTmp(pReNative, iTmpReg);
6385
6386#elif defined(RT_ARCH_ARM64)
6387 /*
6388 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6389 * return raisexcpt();
6390 * ----
6391 * mov x1, 0x800000000000
6392 * add x1, x0, x1
6393 * cmp xzr, x1, lsr 48
6394 * b.ne .Lraisexcpt
6395 */
6396 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6397
6398 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6399 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6400 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6401 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6402
6403 iemNativeRegFreeTmp(pReNative, iTmpReg);
6404
6405#else
6406# error "Port me"
6407#endif
6408 return off;
6409}
6410
6411
6412/**
6413 * Emits code to check if that the content of @a idxAddrReg is within the limit
6414 * of CS, raising a \#GP(0) if it isn't.
6415 *
6416 * @returns New code buffer offset; throws VBox status code on error.
6417 * @param pReNative The native recompile state.
6418 * @param off The code buffer offset.
6419 * @param idxAddrReg The host register (32-bit) with the address to
6420 * check.
6421 * @param idxInstr The current instruction.
6422 */
6423DECL_HIDDEN_THROW(uint32_t)
6424iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6425 uint8_t idxAddrReg, uint8_t idxInstr)
6426{
6427 /*
6428 * Make sure we don't have any outstanding guest register writes as we may
6429 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6430 */
6431 off = iemNativeRegFlushPendingWrites(pReNative, off);
6432
6433#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6434 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6435#else
6436 RT_NOREF(idxInstr);
6437#endif
6438
6439 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6440 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6441 kIemNativeGstRegUse_ReadOnly);
6442
6443 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6444 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6445
6446 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6447 return off;
6448}
6449
6450
6451/**
6452 * Emits a call to a CImpl function or something similar.
6453 */
6454DECL_HIDDEN_THROW(uint32_t)
6455iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6456 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6457{
6458 /* Writeback everything. */
6459 off = iemNativeRegFlushPendingWrites(pReNative, off);
6460
6461 /*
6462 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6463 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6464 */
6465 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6466 fGstShwFlush
6467 | RT_BIT_64(kIemNativeGstReg_Pc)
6468 | RT_BIT_64(kIemNativeGstReg_EFlags));
6469 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6470
6471 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6472
6473 /*
6474 * Load the parameters.
6475 */
6476#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6477 /* Special code the hidden VBOXSTRICTRC pointer. */
6478 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6479 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6480 if (cAddParams > 0)
6481 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6482 if (cAddParams > 1)
6483 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6484 if (cAddParams > 2)
6485 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6486 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6487
6488#else
6489 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6490 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6491 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6492 if (cAddParams > 0)
6493 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6494 if (cAddParams > 1)
6495 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6496 if (cAddParams > 2)
6497# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6498 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6499# else
6500 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6501# endif
6502#endif
6503
6504 /*
6505 * Make the call.
6506 */
6507 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6508
6509#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6510 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6511#endif
6512
6513 /*
6514 * Check the status code.
6515 */
6516 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6517}
6518
6519
6520/**
6521 * Emits a call to a threaded worker function.
6522 */
6523DECL_HIDDEN_THROW(uint32_t)
6524iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6525{
6526 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6527
6528 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6529 off = iemNativeRegFlushPendingWrites(pReNative, off);
6530
6531 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6532 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6533
6534#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6535 /* The threaded function may throw / long jmp, so set current instruction
6536 number if we're counting. */
6537 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6538#endif
6539
6540 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6541
6542#ifdef RT_ARCH_AMD64
6543 /* Load the parameters and emit the call. */
6544# ifdef RT_OS_WINDOWS
6545# ifndef VBOXSTRICTRC_STRICT_ENABLED
6546 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6547 if (cParams > 0)
6548 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6549 if (cParams > 1)
6550 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6551 if (cParams > 2)
6552 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6553# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6554 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6555 if (cParams > 0)
6556 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6557 if (cParams > 1)
6558 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6559 if (cParams > 2)
6560 {
6561 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6562 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6563 }
6564 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6565# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6566# else
6567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6568 if (cParams > 0)
6569 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6570 if (cParams > 1)
6571 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6572 if (cParams > 2)
6573 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6574# endif
6575
6576 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6577
6578# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6579 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6580# endif
6581
6582#elif RT_ARCH_ARM64
6583 /*
6584 * ARM64:
6585 */
6586 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6587 if (cParams > 0)
6588 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6589 if (cParams > 1)
6590 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6591 if (cParams > 2)
6592 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6593
6594 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6595
6596#else
6597# error "port me"
6598#endif
6599
6600 /*
6601 * Check the status code.
6602 */
6603 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6604
6605 return off;
6606}
6607
6608#ifdef VBOX_WITH_STATISTICS
6609/**
6610 * Emits code to update the thread call statistics.
6611 */
6612DECL_INLINE_THROW(uint32_t)
6613iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6614{
6615 /*
6616 * Update threaded function stats.
6617 */
6618 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6619 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6620# if defined(RT_ARCH_ARM64)
6621 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6622 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6623 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6624 iemNativeRegFreeTmp(pReNative, idxTmp1);
6625 iemNativeRegFreeTmp(pReNative, idxTmp2);
6626# else
6627 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6628# endif
6629 return off;
6630}
6631#endif /* VBOX_WITH_STATISTICS */
6632
6633
6634/**
6635 * Emits the code at the ReturnWithFlags label (returns
6636 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6637 */
6638static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6639{
6640 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6641 if (idxLabel != UINT32_MAX)
6642 {
6643 iemNativeLabelDefine(pReNative, idxLabel, off);
6644
6645 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6646
6647 /* jump back to the return sequence. */
6648 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6649 }
6650 return off;
6651}
6652
6653
6654/**
6655 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6656 */
6657static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6658{
6659 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6660 if (idxLabel != UINT32_MAX)
6661 {
6662 iemNativeLabelDefine(pReNative, idxLabel, off);
6663
6664 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6665
6666 /* jump back to the return sequence. */
6667 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6668 }
6669 return off;
6670}
6671
6672
6673/**
6674 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6675 */
6676static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6677{
6678 /*
6679 * Generate the rc + rcPassUp fiddling code if needed.
6680 */
6681 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6682 if (idxLabel != UINT32_MAX)
6683 {
6684 iemNativeLabelDefine(pReNative, idxLabel, off);
6685
6686 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6687#ifdef RT_ARCH_AMD64
6688# ifdef RT_OS_WINDOWS
6689# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6690 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6691# endif
6692 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6693 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6694# else
6695 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6696 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6697# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6698 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6699# endif
6700# endif
6701# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6702 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6703# endif
6704
6705#else
6706 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6707 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6708 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6709#endif
6710
6711 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6712 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6713 }
6714 return off;
6715}
6716
6717
6718/**
6719 * Emits a standard epilog.
6720 */
6721static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6722{
6723 *pidxReturnLabel = UINT32_MAX;
6724
6725 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6726 off = iemNativeRegFlushPendingWrites(pReNative, off);
6727
6728 /*
6729 * Successful return, so clear the return register (eax, w0).
6730 */
6731 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6732
6733 /*
6734 * Define label for common return point.
6735 */
6736 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6737 *pidxReturnLabel = idxReturn;
6738
6739 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6740
6741 /*
6742 * Restore registers and return.
6743 */
6744#ifdef RT_ARCH_AMD64
6745 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6746
6747 /* Reposition esp at the r15 restore point. */
6748 pbCodeBuf[off++] = X86_OP_REX_W;
6749 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6750 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6751 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6752
6753 /* Pop non-volatile registers and return */
6754 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6755 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6756 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6757 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6758 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6759 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6760 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6761 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6762# ifdef RT_OS_WINDOWS
6763 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6764 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6765# endif
6766 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6767 pbCodeBuf[off++] = 0xc9; /* leave */
6768 pbCodeBuf[off++] = 0xc3; /* ret */
6769 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6770
6771#elif RT_ARCH_ARM64
6772 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6773
6774 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6775 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6776 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6777 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6778 IEMNATIVE_FRAME_VAR_SIZE / 8);
6779 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6780 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6781 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6782 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6783 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6784 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6785 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6786 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6787 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6788 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6789 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6790 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6791
6792 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6793 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6794 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6795 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6796
6797 /* retab / ret */
6798# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6799 if (1)
6800 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6801 else
6802# endif
6803 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6804
6805#else
6806# error "port me"
6807#endif
6808 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6809
6810 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6811}
6812
6813
6814/**
6815 * Emits a standard prolog.
6816 */
6817static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6818{
6819#ifdef RT_ARCH_AMD64
6820 /*
6821 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6822 * reserving 64 bytes for stack variables plus 4 non-register argument
6823 * slots. Fixed register assignment: xBX = pReNative;
6824 *
6825 * Since we always do the same register spilling, we can use the same
6826 * unwind description for all the code.
6827 */
6828 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6829 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6830 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6831 pbCodeBuf[off++] = 0x8b;
6832 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6833 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6834 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6835# ifdef RT_OS_WINDOWS
6836 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6837 pbCodeBuf[off++] = 0x8b;
6838 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6839 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6840 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6841# else
6842 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6843 pbCodeBuf[off++] = 0x8b;
6844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6845# endif
6846 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6847 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6848 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6849 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6850 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6851 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6852 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6853 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6854
6855# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6856 /* Save the frame pointer. */
6857 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6858# endif
6859
6860 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6861 X86_GREG_xSP,
6862 IEMNATIVE_FRAME_ALIGN_SIZE
6863 + IEMNATIVE_FRAME_VAR_SIZE
6864 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6865 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6866 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6867 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6868 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6869
6870#elif RT_ARCH_ARM64
6871 /*
6872 * We set up a stack frame exactly like on x86, only we have to push the
6873 * return address our selves here. We save all non-volatile registers.
6874 */
6875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6876
6877# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6878 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6879 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6880 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6881 /* pacibsp */
6882 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6883# endif
6884
6885 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6886 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6887 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6888 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6889 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6890 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6891 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6892 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6893 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6894 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6895 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6896 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6897 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6898 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6899 /* Save the BP and LR (ret address) registers at the top of the frame. */
6900 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6901 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6902 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6903 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6904 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6905 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6906
6907 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6908 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6909
6910 /* mov r28, r0 */
6911 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6912 /* mov r27, r1 */
6913 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6914
6915# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6916 /* Save the frame pointer. */
6917 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6918 ARMV8_A64_REG_X2);
6919# endif
6920
6921#else
6922# error "port me"
6923#endif
6924 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6925 return off;
6926}
6927
6928
6929/*********************************************************************************************************************************
6930* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6931*********************************************************************************************************************************/
6932
6933/**
6934 * Internal work that allocates a variable with kind set to
6935 * kIemNativeVarKind_Invalid and no current stack allocation.
6936 *
6937 * The kind will either be set by the caller or later when the variable is first
6938 * assigned a value.
6939 *
6940 * @returns Unpacked index.
6941 * @internal
6942 */
6943static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6944{
6945 Assert(cbType > 0 && cbType <= 64);
6946 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6947 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6948 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6949 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6950 pReNative->Core.aVars[idxVar].cbVar = cbType;
6951 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6952 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6953 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6954 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6955 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6956 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6957 pReNative->Core.aVars[idxVar].u.uValue = 0;
6958#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6959 pReNative->Core.aVars[idxVar].fSimdReg = false;
6960#endif
6961 return idxVar;
6962}
6963
6964
6965/**
6966 * Internal work that allocates an argument variable w/o setting enmKind.
6967 *
6968 * @returns Unpacked index.
6969 * @internal
6970 */
6971static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6972{
6973 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6974 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6975 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6976
6977 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6978 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6979 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6980 return idxVar;
6981}
6982
6983
6984/**
6985 * Gets the stack slot for a stack variable, allocating one if necessary.
6986 *
6987 * Calling this function implies that the stack slot will contain a valid
6988 * variable value. The caller deals with any register currently assigned to the
6989 * variable, typically by spilling it into the stack slot.
6990 *
6991 * @returns The stack slot number.
6992 * @param pReNative The recompiler state.
6993 * @param idxVar The variable.
6994 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6995 */
6996DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6997{
6998 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6999 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7000 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7001
7002 /* Already got a slot? */
7003 uint8_t const idxStackSlot = pVar->idxStackSlot;
7004 if (idxStackSlot != UINT8_MAX)
7005 {
7006 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7007 return idxStackSlot;
7008 }
7009
7010 /*
7011 * A single slot is easy to allocate.
7012 * Allocate them from the top end, closest to BP, to reduce the displacement.
7013 */
7014 if (pVar->cbVar <= sizeof(uint64_t))
7015 {
7016 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7017 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7018 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7019 pVar->idxStackSlot = (uint8_t)iSlot;
7020 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7021 return (uint8_t)iSlot;
7022 }
7023
7024 /*
7025 * We need more than one stack slot.
7026 *
7027 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7028 */
7029 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7030 Assert(pVar->cbVar <= 64);
7031 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7032 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7033 uint32_t bmStack = ~pReNative->Core.bmStack;
7034 while (bmStack != 0)
7035 {
7036/** @todo allocate from the top to reduce BP displacement. */
7037 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7038 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7039 if (!(iSlot & fBitAlignMask))
7040 {
7041 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7042 {
7043 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7044 pVar->idxStackSlot = (uint8_t)iSlot;
7045 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7046 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7047 return (uint8_t)iSlot;
7048 }
7049 }
7050 bmStack &= ~(fBitAlignMask << (iSlot & ~fBitAlignMask));
7051 }
7052 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7053}
7054
7055
7056/**
7057 * Changes the variable to a stack variable.
7058 *
7059 * Currently this is s only possible to do the first time the variable is used,
7060 * switching later is can be implemented but not done.
7061 *
7062 * @param pReNative The recompiler state.
7063 * @param idxVar The variable.
7064 * @throws VERR_IEM_VAR_IPE_2
7065 */
7066DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7067{
7068 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7069 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7070 if (pVar->enmKind != kIemNativeVarKind_Stack)
7071 {
7072 /* We could in theory transition from immediate to stack as well, but it
7073 would involve the caller doing work storing the value on the stack. So,
7074 till that's required we only allow transition from invalid. */
7075 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7076 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7077 pVar->enmKind = kIemNativeVarKind_Stack;
7078
7079 /* Note! We don't allocate a stack slot here, that's only done when a
7080 slot is actually needed to hold a variable value. */
7081 }
7082}
7083
7084
7085/**
7086 * Sets it to a variable with a constant value.
7087 *
7088 * This does not require stack storage as we know the value and can always
7089 * reload it, unless of course it's referenced.
7090 *
7091 * @param pReNative The recompiler state.
7092 * @param idxVar The variable.
7093 * @param uValue The immediate value.
7094 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7095 */
7096DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7097{
7098 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7099 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7100 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7101 {
7102 /* Only simple transitions for now. */
7103 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7104 pVar->enmKind = kIemNativeVarKind_Immediate;
7105 }
7106 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7107
7108 pVar->u.uValue = uValue;
7109 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7110 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7111 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7112}
7113
7114
7115/**
7116 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7117 *
7118 * This does not require stack storage as we know the value and can always
7119 * reload it. Loading is postponed till needed.
7120 *
7121 * @param pReNative The recompiler state.
7122 * @param idxVar The variable. Unpacked.
7123 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7124 *
7125 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7126 * @internal
7127 */
7128static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7129{
7130 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7131 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7132
7133 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7134 {
7135 /* Only simple transitions for now. */
7136 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7137 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7138 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7139 }
7140 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7141
7142 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7143
7144 /* Update the other variable, ensure it's a stack variable. */
7145 /** @todo handle variables with const values... that'll go boom now. */
7146 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7147 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7148}
7149
7150
7151/**
7152 * Sets the variable to a reference (pointer) to a guest register reference.
7153 *
7154 * This does not require stack storage as we know the value and can always
7155 * reload it. Loading is postponed till needed.
7156 *
7157 * @param pReNative The recompiler state.
7158 * @param idxVar The variable.
7159 * @param enmRegClass The class guest registers to reference.
7160 * @param idxReg The register within @a enmRegClass to reference.
7161 *
7162 * @throws VERR_IEM_VAR_IPE_2
7163 */
7164DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7165 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7166{
7167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7168 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7169
7170 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7171 {
7172 /* Only simple transitions for now. */
7173 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7174 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7175 }
7176 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7177
7178 pVar->u.GstRegRef.enmClass = enmRegClass;
7179 pVar->u.GstRegRef.idx = idxReg;
7180}
7181
7182
7183DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7184{
7185 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7186}
7187
7188
7189DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7190{
7191 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7192
7193 /* Since we're using a generic uint64_t value type, we must truncate it if
7194 the variable is smaller otherwise we may end up with too large value when
7195 scaling up a imm8 w/ sign-extension.
7196
7197 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7198 in the bios, bx=1) when running on arm, because clang expect 16-bit
7199 register parameters to have bits 16 and up set to zero. Instead of
7200 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7201 CF value in the result. */
7202 switch (cbType)
7203 {
7204 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7205 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7206 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7207 }
7208 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7209 return idxVar;
7210}
7211
7212
7213DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7214{
7215 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7216 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7217 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7218 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7219 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7220 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7221
7222 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7223 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7224 return idxArgVar;
7225}
7226
7227
7228DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7229{
7230 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7231 /* Don't set to stack now, leave that to the first use as for instance
7232 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7233 return idxVar;
7234}
7235
7236
7237DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7238{
7239 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7240
7241 /* Since we're using a generic uint64_t value type, we must truncate it if
7242 the variable is smaller otherwise we may end up with too large value when
7243 scaling up a imm8 w/ sign-extension. */
7244 switch (cbType)
7245 {
7246 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7247 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7248 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7249 }
7250 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7251 return idxVar;
7252}
7253
7254
7255/**
7256 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7257 * fixed till we call iemNativeVarRegisterRelease.
7258 *
7259 * @returns The host register number.
7260 * @param pReNative The recompiler state.
7261 * @param idxVar The variable.
7262 * @param poff Pointer to the instruction buffer offset.
7263 * In case a register needs to be freed up or the value
7264 * loaded off the stack.
7265 * @param fInitialized Set if the variable must already have been initialized.
7266 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7267 * the case.
7268 * @param idxRegPref Preferred register number or UINT8_MAX.
7269 */
7270DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7271 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7272{
7273 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7274 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7275 Assert(pVar->cbVar <= 8);
7276 Assert(!pVar->fRegAcquired);
7277
7278 uint8_t idxReg = pVar->idxReg;
7279 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7280 {
7281 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7282 && pVar->enmKind < kIemNativeVarKind_End);
7283 pVar->fRegAcquired = true;
7284 return idxReg;
7285 }
7286
7287 /*
7288 * If the kind of variable has not yet been set, default to 'stack'.
7289 */
7290 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7291 && pVar->enmKind < kIemNativeVarKind_End);
7292 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7293 iemNativeVarSetKindToStack(pReNative, idxVar);
7294
7295 /*
7296 * We have to allocate a register for the variable, even if its a stack one
7297 * as we don't know if there are modification being made to it before its
7298 * finalized (todo: analyze and insert hints about that?).
7299 *
7300 * If we can, we try get the correct register for argument variables. This
7301 * is assuming that most argument variables are fetched as close as possible
7302 * to the actual call, so that there aren't any interfering hidden calls
7303 * (memory accesses, etc) inbetween.
7304 *
7305 * If we cannot or it's a variable, we make sure no argument registers
7306 * that will be used by this MC block will be allocated here, and we always
7307 * prefer non-volatile registers to avoid needing to spill stuff for internal
7308 * call.
7309 */
7310 /** @todo Detect too early argument value fetches and warn about hidden
7311 * calls causing less optimal code to be generated in the python script. */
7312
7313 uint8_t const uArgNo = pVar->uArgNo;
7314 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7315 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7316 {
7317 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7318 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7319 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7320 }
7321 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7322 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7323 {
7324 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7325 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7326 & ~pReNative->Core.bmHstRegsWithGstShadow
7327 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7328 & fNotArgsMask;
7329 if (fRegs)
7330 {
7331 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7332 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7333 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7334 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7335 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7336 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7337 }
7338 else
7339 {
7340 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7341 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7342 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7343 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7344 }
7345 }
7346 else
7347 {
7348 idxReg = idxRegPref;
7349 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7350 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7351 }
7352 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7353 pVar->idxReg = idxReg;
7354
7355#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7356 pVar->fSimdReg = false;
7357#endif
7358
7359 /*
7360 * Load it off the stack if we've got a stack slot.
7361 */
7362 uint8_t const idxStackSlot = pVar->idxStackSlot;
7363 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7364 {
7365 Assert(fInitialized);
7366 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7367 switch (pVar->cbVar)
7368 {
7369 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7370 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7371 case 3: AssertFailed(); RT_FALL_THRU();
7372 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7373 default: AssertFailed(); RT_FALL_THRU();
7374 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7375 }
7376 }
7377 else
7378 {
7379 Assert(idxStackSlot == UINT8_MAX);
7380 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7381 }
7382 pVar->fRegAcquired = true;
7383 return idxReg;
7384}
7385
7386
7387#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7388/**
7389 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7390 * fixed till we call iemNativeVarRegisterRelease.
7391 *
7392 * @returns The host register number.
7393 * @param pReNative The recompiler state.
7394 * @param idxVar The variable.
7395 * @param poff Pointer to the instruction buffer offset.
7396 * In case a register needs to be freed up or the value
7397 * loaded off the stack.
7398 * @param fInitialized Set if the variable must already have been initialized.
7399 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7400 * the case.
7401 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7402 */
7403DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7404 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7405{
7406 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7407 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7408 Assert( pVar->cbVar == sizeof(RTUINT128U)
7409 || pVar->cbVar == sizeof(RTUINT256U));
7410 Assert(!pVar->fRegAcquired);
7411
7412 uint8_t idxReg = pVar->idxReg;
7413 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7414 {
7415 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7416 && pVar->enmKind < kIemNativeVarKind_End);
7417 pVar->fRegAcquired = true;
7418 return idxReg;
7419 }
7420
7421 /*
7422 * If the kind of variable has not yet been set, default to 'stack'.
7423 */
7424 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7425 && pVar->enmKind < kIemNativeVarKind_End);
7426 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7427 iemNativeVarSetKindToStack(pReNative, idxVar);
7428
7429 /*
7430 * We have to allocate a register for the variable, even if its a stack one
7431 * as we don't know if there are modification being made to it before its
7432 * finalized (todo: analyze and insert hints about that?).
7433 *
7434 * If we can, we try get the correct register for argument variables. This
7435 * is assuming that most argument variables are fetched as close as possible
7436 * to the actual call, so that there aren't any interfering hidden calls
7437 * (memory accesses, etc) inbetween.
7438 *
7439 * If we cannot or it's a variable, we make sure no argument registers
7440 * that will be used by this MC block will be allocated here, and we always
7441 * prefer non-volatile registers to avoid needing to spill stuff for internal
7442 * call.
7443 */
7444 /** @todo Detect too early argument value fetches and warn about hidden
7445 * calls causing less optimal code to be generated in the python script. */
7446
7447 uint8_t const uArgNo = pVar->uArgNo;
7448 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7449
7450 /* SIMD is bit simpler for now because there is no support for arguments. */
7451 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7452 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7453 {
7454 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7455 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7456 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7457 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7458 & fNotArgsMask;
7459 if (fRegs)
7460 {
7461 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7462 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7463 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7464 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7465 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7466 }
7467 else
7468 {
7469 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7470 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7471 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7472 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7473 }
7474 }
7475 else
7476 {
7477 idxReg = idxRegPref;
7478 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7479 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7480 }
7481 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7482
7483 pVar->fSimdReg = true;
7484 pVar->idxReg = idxReg;
7485
7486 /*
7487 * Load it off the stack if we've got a stack slot.
7488 */
7489 uint8_t const idxStackSlot = pVar->idxStackSlot;
7490 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7491 {
7492 Assert(fInitialized);
7493 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7494 switch (pVar->cbVar)
7495 {
7496 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7497 default: AssertFailed(); RT_FALL_THRU();
7498 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7499 }
7500 }
7501 else
7502 {
7503 Assert(idxStackSlot == UINT8_MAX);
7504 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7505 }
7506 pVar->fRegAcquired = true;
7507 return idxReg;
7508}
7509#endif
7510
7511
7512/**
7513 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7514 * guest register.
7515 *
7516 * This function makes sure there is a register for it and sets it to be the
7517 * current shadow copy of @a enmGstReg.
7518 *
7519 * @returns The host register number.
7520 * @param pReNative The recompiler state.
7521 * @param idxVar The variable.
7522 * @param enmGstReg The guest register this variable will be written to
7523 * after this call.
7524 * @param poff Pointer to the instruction buffer offset.
7525 * In case a register needs to be freed up or if the
7526 * variable content needs to be loaded off the stack.
7527 *
7528 * @note We DO NOT expect @a idxVar to be an argument variable,
7529 * because we can only in the commit stage of an instruction when this
7530 * function is used.
7531 */
7532DECL_HIDDEN_THROW(uint8_t)
7533iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7534{
7535 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7536 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7537 Assert(!pVar->fRegAcquired);
7538 AssertMsgStmt( pVar->cbVar <= 8
7539 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7540 || pVar->enmKind == kIemNativeVarKind_Stack),
7541 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7542 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7543 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7544
7545 /*
7546 * This shouldn't ever be used for arguments, unless it's in a weird else
7547 * branch that doesn't do any calling and even then it's questionable.
7548 *
7549 * However, in case someone writes crazy wrong MC code and does register
7550 * updates before making calls, just use the regular register allocator to
7551 * ensure we get a register suitable for the intended argument number.
7552 */
7553 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7554
7555 /*
7556 * If there is already a register for the variable, we transfer/set the
7557 * guest shadow copy assignment to it.
7558 */
7559 uint8_t idxReg = pVar->idxReg;
7560 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7561 {
7562 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7563 {
7564 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7565 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7566 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7567 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7568 }
7569 else
7570 {
7571 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7572 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7573 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7574 }
7575 /** @todo figure this one out. We need some way of making sure the register isn't
7576 * modified after this point, just in case we start writing crappy MC code. */
7577 pVar->enmGstReg = enmGstReg;
7578 pVar->fRegAcquired = true;
7579 return idxReg;
7580 }
7581 Assert(pVar->uArgNo == UINT8_MAX);
7582
7583 /*
7584 * Because this is supposed to be the commit stage, we're just tag along with the
7585 * temporary register allocator and upgrade it to a variable register.
7586 */
7587 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7588 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7589 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7590 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7591 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7592 pVar->idxReg = idxReg;
7593
7594 /*
7595 * Now we need to load the register value.
7596 */
7597 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7598 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7599 else
7600 {
7601 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7602 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7603 switch (pVar->cbVar)
7604 {
7605 case sizeof(uint64_t):
7606 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7607 break;
7608 case sizeof(uint32_t):
7609 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7610 break;
7611 case sizeof(uint16_t):
7612 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7613 break;
7614 case sizeof(uint8_t):
7615 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7616 break;
7617 default:
7618 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7619 }
7620 }
7621
7622 pVar->fRegAcquired = true;
7623 return idxReg;
7624}
7625
7626
7627/**
7628 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7629 *
7630 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7631 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7632 * requirement of flushing anything in volatile host registers when making a
7633 * call.
7634 *
7635 * @returns New @a off value.
7636 * @param pReNative The recompiler state.
7637 * @param off The code buffer position.
7638 * @param fHstRegsNotToSave Set of registers not to save & restore.
7639 */
7640DECL_HIDDEN_THROW(uint32_t)
7641iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7642{
7643 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7644 if (fHstRegs)
7645 {
7646 do
7647 {
7648 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7649 fHstRegs &= ~RT_BIT_32(idxHstReg);
7650
7651 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7652 {
7653 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7654 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7655 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7656 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7657 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7658 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7659 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7660 {
7661 case kIemNativeVarKind_Stack:
7662 {
7663 /* Temporarily spill the variable register. */
7664 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7665 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7666 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7667 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7668 continue;
7669 }
7670
7671 case kIemNativeVarKind_Immediate:
7672 case kIemNativeVarKind_VarRef:
7673 case kIemNativeVarKind_GstRegRef:
7674 /* It is weird to have any of these loaded at this point. */
7675 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7676 continue;
7677
7678 case kIemNativeVarKind_End:
7679 case kIemNativeVarKind_Invalid:
7680 break;
7681 }
7682 AssertFailed();
7683 }
7684 else
7685 {
7686 /*
7687 * Allocate a temporary stack slot and spill the register to it.
7688 */
7689 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7690 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7691 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7692 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7693 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7694 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7695 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7696 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7697 }
7698 } while (fHstRegs);
7699 }
7700#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7701 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7702 if (fHstRegs)
7703 {
7704 do
7705 {
7706 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7707 fHstRegs &= ~RT_BIT_32(idxHstReg);
7708
7709 /*
7710 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7711 * which would be more difficult due to spanning multiple stack slots and different sizes
7712 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7713 * don't need saving.
7714 */
7715 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7716 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7717 continue;
7718
7719 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7720
7721 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7723 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7724 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7725 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7726 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7727 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7728 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7729 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7730 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7731 {
7732 case kIemNativeVarKind_Stack:
7733 {
7734 /* Temporarily spill the variable register. */
7735 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7736 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7737 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7738 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7739 if (cbVar == sizeof(RTUINT128U))
7740 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7741 else
7742 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7743 continue;
7744 }
7745
7746 case kIemNativeVarKind_Immediate:
7747 case kIemNativeVarKind_VarRef:
7748 case kIemNativeVarKind_GstRegRef:
7749 /* It is weird to have any of these loaded at this point. */
7750 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7751 continue;
7752
7753 case kIemNativeVarKind_End:
7754 case kIemNativeVarKind_Invalid:
7755 break;
7756 }
7757 AssertFailed();
7758 } while (fHstRegs);
7759 }
7760#endif
7761 return off;
7762}
7763
7764
7765/**
7766 * Emit code to restore volatile registers after to a call to a helper.
7767 *
7768 * @returns New @a off value.
7769 * @param pReNative The recompiler state.
7770 * @param off The code buffer position.
7771 * @param fHstRegsNotToSave Set of registers not to save & restore.
7772 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7773 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7774 */
7775DECL_HIDDEN_THROW(uint32_t)
7776iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7777{
7778 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7779 if (fHstRegs)
7780 {
7781 do
7782 {
7783 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7784 fHstRegs &= ~RT_BIT_32(idxHstReg);
7785
7786 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7787 {
7788 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7789 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7790 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7791 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7792 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7793 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7794 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7795 {
7796 case kIemNativeVarKind_Stack:
7797 {
7798 /* Unspill the variable register. */
7799 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7800 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7801 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7802 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7803 continue;
7804 }
7805
7806 case kIemNativeVarKind_Immediate:
7807 case kIemNativeVarKind_VarRef:
7808 case kIemNativeVarKind_GstRegRef:
7809 /* It is weird to have any of these loaded at this point. */
7810 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7811 continue;
7812
7813 case kIemNativeVarKind_End:
7814 case kIemNativeVarKind_Invalid:
7815 break;
7816 }
7817 AssertFailed();
7818 }
7819 else
7820 {
7821 /*
7822 * Restore from temporary stack slot.
7823 */
7824 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7825 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7826 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7827 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7828
7829 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7830 }
7831 } while (fHstRegs);
7832 }
7833#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7834 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7835 if (fHstRegs)
7836 {
7837 do
7838 {
7839 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7840 fHstRegs &= ~RT_BIT_32(idxHstReg);
7841
7842 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7843 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7844 continue;
7845 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7846
7847 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7848 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7849 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7850 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7851 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7852 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7853 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7854 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7855 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7856 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7857 {
7858 case kIemNativeVarKind_Stack:
7859 {
7860 /* Unspill the variable register. */
7861 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7862 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7863 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7864 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7865
7866 if (cbVar == sizeof(RTUINT128U))
7867 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7868 else
7869 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7870 continue;
7871 }
7872
7873 case kIemNativeVarKind_Immediate:
7874 case kIemNativeVarKind_VarRef:
7875 case kIemNativeVarKind_GstRegRef:
7876 /* It is weird to have any of these loaded at this point. */
7877 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7878 continue;
7879
7880 case kIemNativeVarKind_End:
7881 case kIemNativeVarKind_Invalid:
7882 break;
7883 }
7884 AssertFailed();
7885 } while (fHstRegs);
7886 }
7887#endif
7888 return off;
7889}
7890
7891
7892/**
7893 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7894 *
7895 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7896 *
7897 * ASSUMES that @a idxVar is valid and unpacked.
7898 */
7899DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7900{
7901 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7902 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7903 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7904 {
7905 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7906 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7907 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7908 Assert(cSlots > 0);
7909 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7910 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7911 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7912 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7913 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7914 }
7915 else
7916 Assert(idxStackSlot == UINT8_MAX);
7917}
7918
7919
7920/**
7921 * Worker that frees a single variable.
7922 *
7923 * ASSUMES that @a idxVar is valid and unpacked.
7924 */
7925DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7926{
7927 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7928 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7929 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7930
7931 /* Free the host register first if any assigned. */
7932 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7933#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7934 if ( idxHstReg != UINT8_MAX
7935 && pReNative->Core.aVars[idxVar].fSimdReg)
7936 {
7937 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7938 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7939 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7940 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7941 }
7942 else
7943#endif
7944 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7945 {
7946 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7947 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7948 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7949 }
7950
7951 /* Free argument mapping. */
7952 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7953 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7954 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7955
7956 /* Free the stack slots. */
7957 iemNativeVarFreeStackSlots(pReNative, idxVar);
7958
7959 /* Free the actual variable. */
7960 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7961 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7962}
7963
7964
7965/**
7966 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7967 */
7968DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7969{
7970 while (bmVars != 0)
7971 {
7972 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7973 bmVars &= ~RT_BIT_32(idxVar);
7974
7975#if 1 /** @todo optimize by simplifying this later... */
7976 iemNativeVarFreeOneWorker(pReNative, idxVar);
7977#else
7978 /* Only need to free the host register, the rest is done as bulk updates below. */
7979 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7980 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7981 {
7982 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7983 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7984 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7985 }
7986#endif
7987 }
7988#if 0 /** @todo optimize by simplifying this later... */
7989 pReNative->Core.bmVars = 0;
7990 pReNative->Core.bmStack = 0;
7991 pReNative->Core.u64ArgVars = UINT64_MAX;
7992#endif
7993}
7994
7995
7996
7997/*********************************************************************************************************************************
7998* Emitters for IEM_MC_CALL_CIMPL_XXX *
7999*********************************************************************************************************************************/
8000
8001/**
8002 * Emits code to load a reference to the given guest register into @a idxGprDst.
8003 */
8004DECL_HIDDEN_THROW(uint32_t)
8005iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8006 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8007{
8008#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8009 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8010#endif
8011
8012 /*
8013 * Get the offset relative to the CPUMCTX structure.
8014 */
8015 uint32_t offCpumCtx;
8016 switch (enmClass)
8017 {
8018 case kIemNativeGstRegRef_Gpr:
8019 Assert(idxRegInClass < 16);
8020 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8021 break;
8022
8023 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8024 Assert(idxRegInClass < 4);
8025 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8026 break;
8027
8028 case kIemNativeGstRegRef_EFlags:
8029 Assert(idxRegInClass == 0);
8030 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8031 break;
8032
8033 case kIemNativeGstRegRef_MxCsr:
8034 Assert(idxRegInClass == 0);
8035 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8036 break;
8037
8038 case kIemNativeGstRegRef_FpuReg:
8039 Assert(idxRegInClass < 8);
8040 AssertFailed(); /** @todo what kind of indexing? */
8041 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8042 break;
8043
8044 case kIemNativeGstRegRef_MReg:
8045 Assert(idxRegInClass < 8);
8046 AssertFailed(); /** @todo what kind of indexing? */
8047 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8048 break;
8049
8050 case kIemNativeGstRegRef_XReg:
8051 Assert(idxRegInClass < 16);
8052 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8053 break;
8054
8055 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8056 Assert(idxRegInClass == 0);
8057 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8058 break;
8059
8060 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8061 Assert(idxRegInClass == 0);
8062 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8063 break;
8064
8065 default:
8066 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8067 }
8068
8069 /*
8070 * Load the value into the destination register.
8071 */
8072#ifdef RT_ARCH_AMD64
8073 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8074
8075#elif defined(RT_ARCH_ARM64)
8076 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8077 Assert(offCpumCtx < 4096);
8078 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8079
8080#else
8081# error "Port me!"
8082#endif
8083
8084 return off;
8085}
8086
8087
8088/**
8089 * Common code for CIMPL and AIMPL calls.
8090 *
8091 * These are calls that uses argument variables and such. They should not be
8092 * confused with internal calls required to implement an MC operation,
8093 * like a TLB load and similar.
8094 *
8095 * Upon return all that is left to do is to load any hidden arguments and
8096 * perform the call. All argument variables are freed.
8097 *
8098 * @returns New code buffer offset; throws VBox status code on error.
8099 * @param pReNative The native recompile state.
8100 * @param off The code buffer offset.
8101 * @param cArgs The total nubmer of arguments (includes hidden
8102 * count).
8103 * @param cHiddenArgs The number of hidden arguments. The hidden
8104 * arguments must not have any variable declared for
8105 * them, whereas all the regular arguments must
8106 * (tstIEMCheckMc ensures this).
8107 */
8108DECL_HIDDEN_THROW(uint32_t)
8109iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8110{
8111#ifdef VBOX_STRICT
8112 /*
8113 * Assert sanity.
8114 */
8115 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8116 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8117 for (unsigned i = 0; i < cHiddenArgs; i++)
8118 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8119 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8120 {
8121 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8122 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8123 }
8124 iemNativeRegAssertSanity(pReNative);
8125#endif
8126
8127 /* We don't know what the called function makes use of, so flush any pending register writes. */
8128 off = iemNativeRegFlushPendingWrites(pReNative, off);
8129
8130 /*
8131 * Before we do anything else, go over variables that are referenced and
8132 * make sure they are not in a register.
8133 */
8134 uint32_t bmVars = pReNative->Core.bmVars;
8135 if (bmVars)
8136 {
8137 do
8138 {
8139 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8140 bmVars &= ~RT_BIT_32(idxVar);
8141
8142 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8143 {
8144 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8145#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8146 if ( idxRegOld != UINT8_MAX
8147 && pReNative->Core.aVars[idxVar].fSimdReg)
8148 {
8149 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8150 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8151
8152 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8153 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8154 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8155 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8156 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8157 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8158 else
8159 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8160
8161 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8162 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8163
8164 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8165 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8166 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8167 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8168 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8169 }
8170 else
8171#endif
8172 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8173 {
8174 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8175 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8176 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8177 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8178 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8179
8180 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8181 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8182 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8183 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8184 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8185 }
8186 }
8187 } while (bmVars != 0);
8188#if 0 //def VBOX_STRICT
8189 iemNativeRegAssertSanity(pReNative);
8190#endif
8191 }
8192
8193 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8194
8195 /*
8196 * First, go over the host registers that will be used for arguments and make
8197 * sure they either hold the desired argument or are free.
8198 */
8199 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8200 {
8201 for (uint32_t i = 0; i < cRegArgs; i++)
8202 {
8203 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8204 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8205 {
8206 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8207 {
8208 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8209 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8210 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8211 Assert(pVar->idxReg == idxArgReg);
8212 uint8_t const uArgNo = pVar->uArgNo;
8213 if (uArgNo == i)
8214 { /* prefect */ }
8215 /* The variable allocator logic should make sure this is impossible,
8216 except for when the return register is used as a parameter (ARM,
8217 but not x86). */
8218#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8219 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8220 {
8221# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8222# error "Implement this"
8223# endif
8224 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8225 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8226 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8227 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8228 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8229 }
8230#endif
8231 else
8232 {
8233 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8234
8235 if (pVar->enmKind == kIemNativeVarKind_Stack)
8236 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8237 else
8238 {
8239 /* just free it, can be reloaded if used again */
8240 pVar->idxReg = UINT8_MAX;
8241 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8242 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8243 }
8244 }
8245 }
8246 else
8247 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8248 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8249 }
8250 }
8251#if 0 //def VBOX_STRICT
8252 iemNativeRegAssertSanity(pReNative);
8253#endif
8254 }
8255
8256 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8257
8258#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8259 /*
8260 * If there are any stack arguments, make sure they are in their place as well.
8261 *
8262 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8263 * the caller) be loading it later and it must be free (see first loop).
8264 */
8265 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8266 {
8267 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8268 {
8269 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8270 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8271 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8272 {
8273 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8274 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8275 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8276 pVar->idxReg = UINT8_MAX;
8277 }
8278 else
8279 {
8280 /* Use ARG0 as temp for stuff we need registers for. */
8281 switch (pVar->enmKind)
8282 {
8283 case kIemNativeVarKind_Stack:
8284 {
8285 uint8_t const idxStackSlot = pVar->idxStackSlot;
8286 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8287 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8288 iemNativeStackCalcBpDisp(idxStackSlot));
8289 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8290 continue;
8291 }
8292
8293 case kIemNativeVarKind_Immediate:
8294 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8295 continue;
8296
8297 case kIemNativeVarKind_VarRef:
8298 {
8299 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8300 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8301 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8302 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8303 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8304# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8305 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8306 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8307 if ( fSimdReg
8308 && idxRegOther != UINT8_MAX)
8309 {
8310 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8311 if (cbVar == sizeof(RTUINT128U))
8312 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8313 else
8314 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8315 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8316 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8317 }
8318 else
8319# endif
8320 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8321 {
8322 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8323 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8324 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8325 }
8326 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8327 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8328 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8329 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8330 continue;
8331 }
8332
8333 case kIemNativeVarKind_GstRegRef:
8334 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8335 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8336 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8337 continue;
8338
8339 case kIemNativeVarKind_Invalid:
8340 case kIemNativeVarKind_End:
8341 break;
8342 }
8343 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8344 }
8345 }
8346# if 0 //def VBOX_STRICT
8347 iemNativeRegAssertSanity(pReNative);
8348# endif
8349 }
8350#else
8351 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8352#endif
8353
8354 /*
8355 * Make sure the argument variables are loaded into their respective registers.
8356 *
8357 * We can optimize this by ASSUMING that any register allocations are for
8358 * registeres that have already been loaded and are ready. The previous step
8359 * saw to that.
8360 */
8361 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8362 {
8363 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8364 {
8365 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8366 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8367 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8368 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8369 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8370 else
8371 {
8372 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8373 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8374 {
8375 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8376 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8377 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8378 | RT_BIT_32(idxArgReg);
8379 pVar->idxReg = idxArgReg;
8380 }
8381 else
8382 {
8383 /* Use ARG0 as temp for stuff we need registers for. */
8384 switch (pVar->enmKind)
8385 {
8386 case kIemNativeVarKind_Stack:
8387 {
8388 uint8_t const idxStackSlot = pVar->idxStackSlot;
8389 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8390 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8391 continue;
8392 }
8393
8394 case kIemNativeVarKind_Immediate:
8395 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8396 continue;
8397
8398 case kIemNativeVarKind_VarRef:
8399 {
8400 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8401 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8402 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8403 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8404 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8405 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8406#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8407 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8408 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8409 if ( fSimdReg
8410 && idxRegOther != UINT8_MAX)
8411 {
8412 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8413 if (cbVar == sizeof(RTUINT128U))
8414 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8415 else
8416 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8417 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8418 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8419 }
8420 else
8421#endif
8422 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8423 {
8424 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8425 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8426 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8427 }
8428 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8429 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8430 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8431 continue;
8432 }
8433
8434 case kIemNativeVarKind_GstRegRef:
8435 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8436 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8437 continue;
8438
8439 case kIemNativeVarKind_Invalid:
8440 case kIemNativeVarKind_End:
8441 break;
8442 }
8443 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8444 }
8445 }
8446 }
8447#if 0 //def VBOX_STRICT
8448 iemNativeRegAssertSanity(pReNative);
8449#endif
8450 }
8451#ifdef VBOX_STRICT
8452 else
8453 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8454 {
8455 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8456 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8457 }
8458#endif
8459
8460 /*
8461 * Free all argument variables (simplified).
8462 * Their lifetime always expires with the call they are for.
8463 */
8464 /** @todo Make the python script check that arguments aren't used after
8465 * IEM_MC_CALL_XXXX. */
8466 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8467 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8468 * an argument value. There is also some FPU stuff. */
8469 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8470 {
8471 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8472 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8473
8474 /* no need to free registers: */
8475 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8476 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8477 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8478 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8479 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8480 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8481
8482 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8483 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8484 iemNativeVarFreeStackSlots(pReNative, idxVar);
8485 }
8486 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8487
8488 /*
8489 * Flush volatile registers as we make the call.
8490 */
8491 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8492
8493 return off;
8494}
8495
8496
8497
8498/*********************************************************************************************************************************
8499* TLB Lookup. *
8500*********************************************************************************************************************************/
8501
8502/**
8503 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8504 */
8505DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8506{
8507 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8508 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8509 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8510 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8511
8512 /* Do the lookup manually. */
8513 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8514 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8515 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8516 if (RT_LIKELY(pTlbe->uTag == uTag))
8517 {
8518 /*
8519 * Check TLB page table level access flags.
8520 */
8521 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8522 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8523 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8524 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8525 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8526 | IEMTLBE_F_PG_UNASSIGNED
8527 | IEMTLBE_F_PT_NO_ACCESSED
8528 | fNoWriteNoDirty | fNoUser);
8529 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8530 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8531 {
8532 /*
8533 * Return the address.
8534 */
8535 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8536 if ((uintptr_t)pbAddr == uResult)
8537 return;
8538 RT_NOREF(cbMem);
8539 AssertFailed();
8540 }
8541 else
8542 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8543 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8544 }
8545 else
8546 AssertFailed();
8547 RT_BREAKPOINT();
8548}
8549
8550/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8551
8552
8553
8554/*********************************************************************************************************************************
8555* Recompiler Core. *
8556*********************************************************************************************************************************/
8557
8558/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8559static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8560{
8561 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8562 pDis->cbCachedInstr += cbMaxRead;
8563 RT_NOREF(cbMinRead);
8564 return VERR_NO_DATA;
8565}
8566
8567
8568DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8569{
8570 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8571 {
8572#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8573 ENTRY(fLocalForcedActions),
8574 ENTRY(iem.s.rcPassUp),
8575 ENTRY(iem.s.fExec),
8576 ENTRY(iem.s.pbInstrBuf),
8577 ENTRY(iem.s.uInstrBufPc),
8578 ENTRY(iem.s.GCPhysInstrBuf),
8579 ENTRY(iem.s.cbInstrBufTotal),
8580 ENTRY(iem.s.idxTbCurInstr),
8581#ifdef VBOX_WITH_STATISTICS
8582 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8583 ENTRY(iem.s.StatNativeTlbHitsForStore),
8584 ENTRY(iem.s.StatNativeTlbHitsForStack),
8585 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8586 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8587 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8588 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8589 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8590#endif
8591 ENTRY(iem.s.DataTlb.aEntries),
8592 ENTRY(iem.s.DataTlb.uTlbRevision),
8593 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8594 ENTRY(iem.s.DataTlb.cTlbHits),
8595 ENTRY(iem.s.CodeTlb.aEntries),
8596 ENTRY(iem.s.CodeTlb.uTlbRevision),
8597 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8598 ENTRY(iem.s.CodeTlb.cTlbHits),
8599 ENTRY(pVMR3),
8600 ENTRY(cpum.GstCtx.rax),
8601 ENTRY(cpum.GstCtx.ah),
8602 ENTRY(cpum.GstCtx.rcx),
8603 ENTRY(cpum.GstCtx.ch),
8604 ENTRY(cpum.GstCtx.rdx),
8605 ENTRY(cpum.GstCtx.dh),
8606 ENTRY(cpum.GstCtx.rbx),
8607 ENTRY(cpum.GstCtx.bh),
8608 ENTRY(cpum.GstCtx.rsp),
8609 ENTRY(cpum.GstCtx.rbp),
8610 ENTRY(cpum.GstCtx.rsi),
8611 ENTRY(cpum.GstCtx.rdi),
8612 ENTRY(cpum.GstCtx.r8),
8613 ENTRY(cpum.GstCtx.r9),
8614 ENTRY(cpum.GstCtx.r10),
8615 ENTRY(cpum.GstCtx.r11),
8616 ENTRY(cpum.GstCtx.r12),
8617 ENTRY(cpum.GstCtx.r13),
8618 ENTRY(cpum.GstCtx.r14),
8619 ENTRY(cpum.GstCtx.r15),
8620 ENTRY(cpum.GstCtx.es.Sel),
8621 ENTRY(cpum.GstCtx.es.u64Base),
8622 ENTRY(cpum.GstCtx.es.u32Limit),
8623 ENTRY(cpum.GstCtx.es.Attr),
8624 ENTRY(cpum.GstCtx.cs.Sel),
8625 ENTRY(cpum.GstCtx.cs.u64Base),
8626 ENTRY(cpum.GstCtx.cs.u32Limit),
8627 ENTRY(cpum.GstCtx.cs.Attr),
8628 ENTRY(cpum.GstCtx.ss.Sel),
8629 ENTRY(cpum.GstCtx.ss.u64Base),
8630 ENTRY(cpum.GstCtx.ss.u32Limit),
8631 ENTRY(cpum.GstCtx.ss.Attr),
8632 ENTRY(cpum.GstCtx.ds.Sel),
8633 ENTRY(cpum.GstCtx.ds.u64Base),
8634 ENTRY(cpum.GstCtx.ds.u32Limit),
8635 ENTRY(cpum.GstCtx.ds.Attr),
8636 ENTRY(cpum.GstCtx.fs.Sel),
8637 ENTRY(cpum.GstCtx.fs.u64Base),
8638 ENTRY(cpum.GstCtx.fs.u32Limit),
8639 ENTRY(cpum.GstCtx.fs.Attr),
8640 ENTRY(cpum.GstCtx.gs.Sel),
8641 ENTRY(cpum.GstCtx.gs.u64Base),
8642 ENTRY(cpum.GstCtx.gs.u32Limit),
8643 ENTRY(cpum.GstCtx.gs.Attr),
8644 ENTRY(cpum.GstCtx.rip),
8645 ENTRY(cpum.GstCtx.eflags),
8646 ENTRY(cpum.GstCtx.uRipInhibitInt),
8647 ENTRY(cpum.GstCtx.cr0),
8648 ENTRY(cpum.GstCtx.cr4),
8649 ENTRY(cpum.GstCtx.aXcr[0]),
8650 ENTRY(cpum.GstCtx.aXcr[1]),
8651#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8652 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8653 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8654 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8655 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8656 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8657 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8658 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8659 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8660 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8661 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8662 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8663 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8664 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8665 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8666 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8667 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8668 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8669 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8670 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8671 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8672 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8673 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8674 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8675 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8676 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8677 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8678 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8679 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8680 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8681 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8682 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8683 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8684#endif
8685#undef ENTRY
8686 };
8687#ifdef VBOX_STRICT
8688 static bool s_fOrderChecked = false;
8689 if (!s_fOrderChecked)
8690 {
8691 s_fOrderChecked = true;
8692 uint32_t offPrev = s_aMembers[0].off;
8693 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8694 {
8695 Assert(s_aMembers[i].off > offPrev);
8696 offPrev = s_aMembers[i].off;
8697 }
8698 }
8699#endif
8700
8701 /*
8702 * Binary lookup.
8703 */
8704 unsigned iStart = 0;
8705 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8706 for (;;)
8707 {
8708 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8709 uint32_t const offCur = s_aMembers[iCur].off;
8710 if (off < offCur)
8711 {
8712 if (iCur != iStart)
8713 iEnd = iCur;
8714 else
8715 break;
8716 }
8717 else if (off > offCur)
8718 {
8719 if (iCur + 1 < iEnd)
8720 iStart = iCur + 1;
8721 else
8722 break;
8723 }
8724 else
8725 return s_aMembers[iCur].pszName;
8726 }
8727#ifdef VBOX_WITH_STATISTICS
8728 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8729 return "iem.s.acThreadedFuncStats[iFn]";
8730#endif
8731 return NULL;
8732}
8733
8734
8735DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8736{
8737 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8738#if defined(RT_ARCH_AMD64)
8739 static const char * const a_apszMarkers[] =
8740 {
8741 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8742 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8743 };
8744#endif
8745
8746 char szDisBuf[512];
8747 DISSTATE Dis;
8748 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8749 uint32_t const cNative = pTb->Native.cInstructions;
8750 uint32_t offNative = 0;
8751#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8752 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8753#endif
8754 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8755 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8756 : DISCPUMODE_64BIT;
8757#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8758 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8759#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8760 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8761#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8762# error "Port me"
8763#else
8764 csh hDisasm = ~(size_t)0;
8765# if defined(RT_ARCH_AMD64)
8766 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8767# elif defined(RT_ARCH_ARM64)
8768 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8769# else
8770# error "Port me"
8771# endif
8772 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8773
8774 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8775 //Assert(rcCs == CS_ERR_OK);
8776#endif
8777
8778 /*
8779 * Print TB info.
8780 */
8781 pHlp->pfnPrintf(pHlp,
8782 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8783 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8784 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8785 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8786#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8787 if (pDbgInfo && pDbgInfo->cEntries > 1)
8788 {
8789 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8790
8791 /*
8792 * This disassembly is driven by the debug info which follows the native
8793 * code and indicates when it starts with the next guest instructions,
8794 * where labels are and such things.
8795 */
8796 uint32_t idxThreadedCall = 0;
8797 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8798 uint8_t idxRange = UINT8_MAX;
8799 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8800 uint32_t offRange = 0;
8801 uint32_t offOpcodes = 0;
8802 uint32_t const cbOpcodes = pTb->cbOpcodes;
8803 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8804 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8805 uint32_t iDbgEntry = 1;
8806 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8807
8808 while (offNative < cNative)
8809 {
8810 /* If we're at or have passed the point where the next chunk of debug
8811 info starts, process it. */
8812 if (offDbgNativeNext <= offNative)
8813 {
8814 offDbgNativeNext = UINT32_MAX;
8815 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8816 {
8817 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8818 {
8819 case kIemTbDbgEntryType_GuestInstruction:
8820 {
8821 /* Did the exec flag change? */
8822 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8823 {
8824 pHlp->pfnPrintf(pHlp,
8825 " fExec change %#08x -> %#08x %s\n",
8826 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8827 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8828 szDisBuf, sizeof(szDisBuf)));
8829 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8830 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8831 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8832 : DISCPUMODE_64BIT;
8833 }
8834
8835 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8836 where the compilation was aborted before the opcode was recorded and the actual
8837 instruction was translated to a threaded call. This may happen when we run out
8838 of ranges, or when some complicated interrupts/FFs are found to be pending or
8839 similar. So, we just deal with it here rather than in the compiler code as it
8840 is a lot simpler to do here. */
8841 if ( idxRange == UINT8_MAX
8842 || idxRange >= cRanges
8843 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8844 {
8845 idxRange += 1;
8846 if (idxRange < cRanges)
8847 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8848 else
8849 continue;
8850 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8851 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8852 + (pTb->aRanges[idxRange].idxPhysPage == 0
8853 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8854 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8855 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8856 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8857 pTb->aRanges[idxRange].idxPhysPage);
8858 GCPhysPc += offRange;
8859 }
8860
8861 /* Disassemble the instruction. */
8862 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8863 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8864 uint32_t cbInstr = 1;
8865 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8866 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8867 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8868 if (RT_SUCCESS(rc))
8869 {
8870 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8871 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8872 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8873 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8874
8875 static unsigned const s_offMarker = 55;
8876 static char const s_szMarker[] = " ; <--- guest";
8877 if (cch < s_offMarker)
8878 {
8879 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8880 cch = s_offMarker;
8881 }
8882 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8883 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8884
8885 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8886 }
8887 else
8888 {
8889 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8890 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8891 cbInstr = 1;
8892 }
8893 GCPhysPc += cbInstr;
8894 offOpcodes += cbInstr;
8895 offRange += cbInstr;
8896 continue;
8897 }
8898
8899 case kIemTbDbgEntryType_ThreadedCall:
8900 pHlp->pfnPrintf(pHlp,
8901 " Call #%u to %s (%u args) - %s\n",
8902 idxThreadedCall,
8903 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8904 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8905 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8906 idxThreadedCall++;
8907 continue;
8908
8909 case kIemTbDbgEntryType_GuestRegShadowing:
8910 {
8911 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8912 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8913 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8914 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8915 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8916 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8917 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8918 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8919 else
8920 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8921 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8922 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8923 continue;
8924 }
8925
8926#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8927 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8928 {
8929 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8930 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8931 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8932 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8933 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8934 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8935 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8936 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8937 else
8938 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8939 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8940 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8941 continue;
8942 }
8943#endif
8944
8945 case kIemTbDbgEntryType_Label:
8946 {
8947 const char *pszName = "what_the_fudge";
8948 const char *pszComment = "";
8949 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8950 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8951 {
8952 case kIemNativeLabelType_Return: pszName = "Return"; break;
8953 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8954 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8955 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8956 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8957 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8958 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8959 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8960 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8961 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8962 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8963 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8964 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8965 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8966 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8967 case kIemNativeLabelType_If:
8968 pszName = "If";
8969 fNumbered = true;
8970 break;
8971 case kIemNativeLabelType_Else:
8972 pszName = "Else";
8973 fNumbered = true;
8974 pszComment = " ; regs state restored pre-if-block";
8975 break;
8976 case kIemNativeLabelType_Endif:
8977 pszName = "Endif";
8978 fNumbered = true;
8979 break;
8980 case kIemNativeLabelType_CheckIrq:
8981 pszName = "CheckIrq_CheckVM";
8982 fNumbered = true;
8983 break;
8984 case kIemNativeLabelType_TlbLookup:
8985 pszName = "TlbLookup";
8986 fNumbered = true;
8987 break;
8988 case kIemNativeLabelType_TlbMiss:
8989 pszName = "TlbMiss";
8990 fNumbered = true;
8991 break;
8992 case kIemNativeLabelType_TlbDone:
8993 pszName = "TlbDone";
8994 fNumbered = true;
8995 break;
8996 case kIemNativeLabelType_Invalid:
8997 case kIemNativeLabelType_End:
8998 break;
8999 }
9000 if (fNumbered)
9001 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9002 else
9003 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9004 continue;
9005 }
9006
9007 case kIemTbDbgEntryType_NativeOffset:
9008 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9009 Assert(offDbgNativeNext > offNative);
9010 break;
9011
9012#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9013 case kIemTbDbgEntryType_DelayedPcUpdate:
9014 pHlp->pfnPrintf(pHlp,
9015 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9016 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9017 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9018 continue;
9019#endif
9020
9021 default:
9022 AssertFailed();
9023 }
9024 iDbgEntry++;
9025 break;
9026 }
9027 }
9028
9029 /*
9030 * Disassemble the next native instruction.
9031 */
9032 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9033# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9034 uint32_t cbInstr = sizeof(paNative[0]);
9035 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9036 if (RT_SUCCESS(rc))
9037 {
9038# if defined(RT_ARCH_AMD64)
9039 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9040 {
9041 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9042 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9043 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9044 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9045 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9046 uInfo & 0x8000 ? "recompiled" : "todo");
9047 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9048 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9049 else
9050 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9051 }
9052 else
9053# endif
9054 {
9055 const char *pszAnnotation = NULL;
9056# ifdef RT_ARCH_AMD64
9057 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9058 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9059 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9060 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9061 PCDISOPPARAM pMemOp;
9062 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9063 pMemOp = &Dis.Param1;
9064 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9065 pMemOp = &Dis.Param2;
9066 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9067 pMemOp = &Dis.Param3;
9068 else
9069 pMemOp = NULL;
9070 if ( pMemOp
9071 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9072 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9073 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9074 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9075
9076#elif defined(RT_ARCH_ARM64)
9077 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9078 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9079 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9080# else
9081# error "Port me"
9082# endif
9083 if (pszAnnotation)
9084 {
9085 static unsigned const s_offAnnotation = 55;
9086 size_t const cchAnnotation = strlen(pszAnnotation);
9087 size_t cchDis = strlen(szDisBuf);
9088 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9089 {
9090 if (cchDis < s_offAnnotation)
9091 {
9092 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9093 cchDis = s_offAnnotation;
9094 }
9095 szDisBuf[cchDis++] = ' ';
9096 szDisBuf[cchDis++] = ';';
9097 szDisBuf[cchDis++] = ' ';
9098 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9099 }
9100 }
9101 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9102 }
9103 }
9104 else
9105 {
9106# if defined(RT_ARCH_AMD64)
9107 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9108 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9109# elif defined(RT_ARCH_ARM64)
9110 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9111# else
9112# error "Port me"
9113# endif
9114 cbInstr = sizeof(paNative[0]);
9115 }
9116 offNative += cbInstr / sizeof(paNative[0]);
9117
9118# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9119 cs_insn *pInstr;
9120 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9121 (uintptr_t)pNativeCur, 1, &pInstr);
9122 if (cInstrs > 0)
9123 {
9124 Assert(cInstrs == 1);
9125 const char *pszAnnotation = NULL;
9126# if defined(RT_ARCH_ARM64)
9127 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9128 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9129 {
9130 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9131 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9132 char *psz = strchr(pInstr->op_str, '[');
9133 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9134 {
9135 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9136 int32_t off = -1;
9137 psz += 4;
9138 if (*psz == ']')
9139 off = 0;
9140 else if (*psz == ',')
9141 {
9142 psz = RTStrStripL(psz + 1);
9143 if (*psz == '#')
9144 off = RTStrToInt32(&psz[1]);
9145 /** @todo deal with index registers and LSL as well... */
9146 }
9147 if (off >= 0)
9148 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9149 }
9150 }
9151# endif
9152
9153 size_t const cchOp = strlen(pInstr->op_str);
9154# if defined(RT_ARCH_AMD64)
9155 if (pszAnnotation)
9156 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9157 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9158 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9159 else
9160 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9161 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9162
9163# else
9164 if (pszAnnotation)
9165 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9166 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9167 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9168 else
9169 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9170 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9171# endif
9172 offNative += pInstr->size / sizeof(*pNativeCur);
9173 cs_free(pInstr, cInstrs);
9174 }
9175 else
9176 {
9177# if defined(RT_ARCH_AMD64)
9178 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9179 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9180# else
9181 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9182# endif
9183 offNative++;
9184 }
9185# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9186 }
9187 }
9188 else
9189#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9190 {
9191 /*
9192 * No debug info, just disassemble the x86 code and then the native code.
9193 *
9194 * First the guest code:
9195 */
9196 for (unsigned i = 0; i < pTb->cRanges; i++)
9197 {
9198 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9199 + (pTb->aRanges[i].idxPhysPage == 0
9200 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9201 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9202 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9203 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9204 unsigned off = pTb->aRanges[i].offOpcodes;
9205 /** @todo this ain't working when crossing pages! */
9206 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9207 while (off < cbOpcodes)
9208 {
9209 uint32_t cbInstr = 1;
9210 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9211 &pTb->pabOpcodes[off], cbOpcodes - off,
9212 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9213 if (RT_SUCCESS(rc))
9214 {
9215 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9216 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9217 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9218 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9219 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9220 GCPhysPc += cbInstr;
9221 off += cbInstr;
9222 }
9223 else
9224 {
9225 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9226 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9227 break;
9228 }
9229 }
9230 }
9231
9232 /*
9233 * Then the native code:
9234 */
9235 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9236 while (offNative < cNative)
9237 {
9238 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9239# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9240 uint32_t cbInstr = sizeof(paNative[0]);
9241 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9242 if (RT_SUCCESS(rc))
9243 {
9244# if defined(RT_ARCH_AMD64)
9245 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9246 {
9247 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9248 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9249 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9250 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9251 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9252 uInfo & 0x8000 ? "recompiled" : "todo");
9253 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9254 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9255 else
9256 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9257 }
9258 else
9259# endif
9260 {
9261# ifdef RT_ARCH_AMD64
9262 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9263 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9264 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9265 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9266# elif defined(RT_ARCH_ARM64)
9267 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9268 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9269 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9270# else
9271# error "Port me"
9272# endif
9273 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9274 }
9275 }
9276 else
9277 {
9278# if defined(RT_ARCH_AMD64)
9279 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9280 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9281# else
9282 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9283# endif
9284 cbInstr = sizeof(paNative[0]);
9285 }
9286 offNative += cbInstr / sizeof(paNative[0]);
9287
9288# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9289 cs_insn *pInstr;
9290 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9291 (uintptr_t)pNativeCur, 1, &pInstr);
9292 if (cInstrs > 0)
9293 {
9294 Assert(cInstrs == 1);
9295# if defined(RT_ARCH_AMD64)
9296 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9297 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9298# else
9299 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9300 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9301# endif
9302 offNative += pInstr->size / sizeof(*pNativeCur);
9303 cs_free(pInstr, cInstrs);
9304 }
9305 else
9306 {
9307# if defined(RT_ARCH_AMD64)
9308 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9309 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9310# else
9311 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9312# endif
9313 offNative++;
9314 }
9315# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9316 }
9317 }
9318
9319#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9320 /* Cleanup. */
9321 cs_close(&hDisasm);
9322#endif
9323}
9324
9325
9326/**
9327 * Recompiles the given threaded TB into a native one.
9328 *
9329 * In case of failure the translation block will be returned as-is.
9330 *
9331 * @returns pTb.
9332 * @param pVCpu The cross context virtual CPU structure of the calling
9333 * thread.
9334 * @param pTb The threaded translation to recompile to native.
9335 */
9336DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9337{
9338 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9339
9340 /*
9341 * The first time thru, we allocate the recompiler state, the other times
9342 * we just need to reset it before using it again.
9343 */
9344 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9345 if (RT_LIKELY(pReNative))
9346 iemNativeReInit(pReNative, pTb);
9347 else
9348 {
9349 pReNative = iemNativeInit(pVCpu, pTb);
9350 AssertReturn(pReNative, pTb);
9351 }
9352
9353#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9354 /*
9355 * First do liveness analysis. This is done backwards.
9356 */
9357 {
9358 uint32_t idxCall = pTb->Thrd.cCalls;
9359 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9360 { /* likely */ }
9361 else
9362 {
9363 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9364 while (idxCall > cAlloc)
9365 cAlloc *= 2;
9366 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9367 AssertReturn(pvNew, pTb);
9368 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9369 pReNative->cLivenessEntriesAlloc = cAlloc;
9370 }
9371 AssertReturn(idxCall > 0, pTb);
9372 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9373
9374 /* The initial (final) entry. */
9375 idxCall--;
9376 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9377
9378 /* Loop backwards thru the calls and fill in the other entries. */
9379 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9380 while (idxCall > 0)
9381 {
9382 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9383 if (pfnLiveness)
9384 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9385 else
9386 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9387 pCallEntry--;
9388 idxCall--;
9389 }
9390
9391# ifdef VBOX_WITH_STATISTICS
9392 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9393 to 'clobbered' rather that 'input'. */
9394 /** @todo */
9395# endif
9396 }
9397#endif
9398
9399 /*
9400 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9401 * for aborting if an error happens.
9402 */
9403 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9404#ifdef LOG_ENABLED
9405 uint32_t const cCallsOrg = cCallsLeft;
9406#endif
9407 uint32_t off = 0;
9408 int rc = VINF_SUCCESS;
9409 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9410 {
9411 /*
9412 * Emit prolog code (fixed).
9413 */
9414 off = iemNativeEmitProlog(pReNative, off);
9415
9416 /*
9417 * Convert the calls to native code.
9418 */
9419#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9420 int32_t iGstInstr = -1;
9421#endif
9422#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9423 uint32_t cThreadedCalls = 0;
9424 uint32_t cRecompiledCalls = 0;
9425#endif
9426#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9427 uint32_t idxCurCall = 0;
9428#endif
9429 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9430 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9431 while (cCallsLeft-- > 0)
9432 {
9433 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9434#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9435 pReNative->idxCurCall = idxCurCall;
9436#endif
9437
9438 /*
9439 * Debug info, assembly markup and statistics.
9440 */
9441#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9442 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9443 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9444#endif
9445#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9446 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9447 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9448 {
9449 if (iGstInstr < (int32_t)pTb->cInstructions)
9450 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9451 else
9452 Assert(iGstInstr == pTb->cInstructions);
9453 iGstInstr = pCallEntry->idxInstr;
9454 }
9455 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9456#endif
9457#if defined(VBOX_STRICT)
9458 off = iemNativeEmitMarker(pReNative, off,
9459 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9460#endif
9461#if defined(VBOX_STRICT)
9462 iemNativeRegAssertSanity(pReNative);
9463#endif
9464#ifdef VBOX_WITH_STATISTICS
9465 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9466#endif
9467
9468 /*
9469 * Actual work.
9470 */
9471 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9472 pfnRecom ? "(recompiled)" : "(todo)"));
9473 if (pfnRecom) /** @todo stats on this. */
9474 {
9475 off = pfnRecom(pReNative, off, pCallEntry);
9476 STAM_REL_STATS({cRecompiledCalls++;});
9477 }
9478 else
9479 {
9480 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9481 STAM_REL_STATS({cThreadedCalls++;});
9482 }
9483 Assert(off <= pReNative->cInstrBufAlloc);
9484 Assert(pReNative->cCondDepth == 0);
9485
9486#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9487 if (LogIs2Enabled())
9488 {
9489 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9490# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9491 static const char s_achState[] = "CUXI";
9492# else
9493 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9494# endif
9495
9496 char szGpr[17];
9497 for (unsigned i = 0; i < 16; i++)
9498 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9499 szGpr[16] = '\0';
9500
9501 char szSegBase[X86_SREG_COUNT + 1];
9502 char szSegLimit[X86_SREG_COUNT + 1];
9503 char szSegAttrib[X86_SREG_COUNT + 1];
9504 char szSegSel[X86_SREG_COUNT + 1];
9505 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9506 {
9507 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9508 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9509 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9510 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9511 }
9512 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9513 = szSegSel[X86_SREG_COUNT] = '\0';
9514
9515 char szEFlags[8];
9516 for (unsigned i = 0; i < 7; i++)
9517 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9518 szEFlags[7] = '\0';
9519
9520 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9521 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9522 }
9523#endif
9524
9525 /*
9526 * Advance.
9527 */
9528 pCallEntry++;
9529#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9530 idxCurCall++;
9531#endif
9532 }
9533
9534 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9535 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9536 if (!cThreadedCalls)
9537 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9538
9539 /*
9540 * Emit the epilog code.
9541 */
9542 uint32_t idxReturnLabel;
9543 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9544
9545 /*
9546 * Generate special jump labels.
9547 */
9548 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9549 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9550 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9551 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9552
9553 /*
9554 * Generate simple TB tail labels that just calls a help with a pVCpu
9555 * arg and either return or longjmps/throws a non-zero status.
9556 *
9557 * The array entries must be ordered by enmLabel value so we can index
9558 * using fTailLabels bit numbers.
9559 */
9560 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9561 static struct
9562 {
9563 IEMNATIVELABELTYPE enmLabel;
9564 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9565 } const g_aSimpleTailLabels[] =
9566 {
9567 { kIemNativeLabelType_Invalid, NULL },
9568 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9569 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9570 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9571 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9572 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9573 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9574 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9575 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9576 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9577 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9578 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9579 };
9580 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9581 AssertCompile(kIemNativeLabelType_Invalid == 0);
9582 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9583 if (fTailLabels)
9584 {
9585 do
9586 {
9587 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9588 fTailLabels &= ~RT_BIT_64(enmLabel);
9589 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9590
9591 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9592 Assert(idxLabel != UINT32_MAX);
9593 if (idxLabel != UINT32_MAX)
9594 {
9595 iemNativeLabelDefine(pReNative, idxLabel, off);
9596
9597 /* int pfnCallback(PVMCPUCC pVCpu) */
9598 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9599 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9600
9601 /* jump back to the return sequence. */
9602 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9603 }
9604
9605 } while (fTailLabels);
9606 }
9607 }
9608 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9609 {
9610 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9611 return pTb;
9612 }
9613 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9614 Assert(off <= pReNative->cInstrBufAlloc);
9615
9616 /*
9617 * Make sure all labels has been defined.
9618 */
9619 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9620#ifdef VBOX_STRICT
9621 uint32_t const cLabels = pReNative->cLabels;
9622 for (uint32_t i = 0; i < cLabels; i++)
9623 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9624#endif
9625
9626 /*
9627 * Allocate executable memory, copy over the code we've generated.
9628 */
9629 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9630 if (pTbAllocator->pDelayedFreeHead)
9631 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9632
9633 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9634 AssertReturn(paFinalInstrBuf, pTb);
9635 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9636
9637 /*
9638 * Apply fixups.
9639 */
9640 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9641 uint32_t const cFixups = pReNative->cFixups;
9642 for (uint32_t i = 0; i < cFixups; i++)
9643 {
9644 Assert(paFixups[i].off < off);
9645 Assert(paFixups[i].idxLabel < cLabels);
9646 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9647 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9648 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9649 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9650 switch (paFixups[i].enmType)
9651 {
9652#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9653 case kIemNativeFixupType_Rel32:
9654 Assert(paFixups[i].off + 4 <= off);
9655 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9656 continue;
9657
9658#elif defined(RT_ARCH_ARM64)
9659 case kIemNativeFixupType_RelImm26At0:
9660 {
9661 Assert(paFixups[i].off < off);
9662 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9663 Assert(offDisp >= -262144 && offDisp < 262144);
9664 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9665 continue;
9666 }
9667
9668 case kIemNativeFixupType_RelImm19At5:
9669 {
9670 Assert(paFixups[i].off < off);
9671 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9672 Assert(offDisp >= -262144 && offDisp < 262144);
9673 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9674 continue;
9675 }
9676
9677 case kIemNativeFixupType_RelImm14At5:
9678 {
9679 Assert(paFixups[i].off < off);
9680 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9681 Assert(offDisp >= -8192 && offDisp < 8192);
9682 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9683 continue;
9684 }
9685
9686#endif
9687 case kIemNativeFixupType_Invalid:
9688 case kIemNativeFixupType_End:
9689 break;
9690 }
9691 AssertFailed();
9692 }
9693
9694 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9695 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9696
9697 /*
9698 * Convert the translation block.
9699 */
9700 RTMemFree(pTb->Thrd.paCalls);
9701 pTb->Native.paInstructions = paFinalInstrBuf;
9702 pTb->Native.cInstructions = off;
9703 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9704#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9705 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9706 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9707#endif
9708
9709 Assert(pTbAllocator->cThreadedTbs > 0);
9710 pTbAllocator->cThreadedTbs -= 1;
9711 pTbAllocator->cNativeTbs += 1;
9712 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9713
9714#ifdef LOG_ENABLED
9715 /*
9716 * Disassemble to the log if enabled.
9717 */
9718 if (LogIs3Enabled())
9719 {
9720 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9721 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9722# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9723 RTLogFlush(NULL);
9724# endif
9725 }
9726#endif
9727 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9728
9729 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9730 return pTb;
9731}
9732
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette