VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103993

Last change on this file since 103993 was 103993, checked in by vboxsync, 9 months ago

VMM/IEM: Implement native emitters for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT(), IEM_MC_REF_XREG_R32_CONST(), IEM_MC_REF_XREG_R64_CONST(), IEM_MC_REF_XREG_U32_CONST(), IEM_MC_REF_XREG_U64_CONST() and IEM_MC_STORE_SSE_RESULT(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 428.8 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103993 2024-03-21 17:59:07Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
1643 *
1644 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
1647{
1648 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
1649 iemRaiseSimdFpExceptionJmp(pVCpu);
1650 else
1651 iemRaiseUndefinedOpcodeJmp(pVCpu);
1652#ifndef _MSC_VER
1653 return VINF_IEM_RAISED_XCPT; /* not reached */
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code when it wants to raise a \#NM.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1662{
1663 iemRaiseDeviceNotAvailableJmp(pVCpu);
1664#ifndef _MSC_VER
1665 return VINF_IEM_RAISED_XCPT; /* not reached */
1666#endif
1667}
1668
1669
1670/**
1671 * Used by TB code when it wants to raise a \#GP(0).
1672 */
1673IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1674{
1675 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1676#ifndef _MSC_VER
1677 return VINF_IEM_RAISED_XCPT; /* not reached */
1678#endif
1679}
1680
1681
1682/**
1683 * Used by TB code when it wants to raise a \#MF.
1684 */
1685IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1686{
1687 iemRaiseMathFaultJmp(pVCpu);
1688#ifndef _MSC_VER
1689 return VINF_IEM_RAISED_XCPT; /* not reached */
1690#endif
1691}
1692
1693
1694/**
1695 * Used by TB code when it wants to raise a \#XF.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1698{
1699 iemRaiseSimdFpExceptionJmp(pVCpu);
1700#ifndef _MSC_VER
1701 return VINF_IEM_RAISED_XCPT; /* not reached */
1702#endif
1703}
1704
1705
1706/**
1707 * Used by TB code when detecting opcode changes.
1708 * @see iemThreadeFuncWorkerObsoleteTb
1709 */
1710IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1711{
1712 /* We set fSafeToFree to false where as we're being called in the context
1713 of a TB callback function, which for native TBs means we cannot release
1714 the executable memory till we've returned our way back to iemTbExec as
1715 that return path codes via the native code generated for the TB. */
1716 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1717 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1718 return VINF_IEM_REEXEC_BREAK;
1719}
1720
1721
1722/**
1723 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1726{
1727 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1728 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1729 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1730 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1731 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1732 return VINF_IEM_REEXEC_BREAK;
1733}
1734
1735
1736/**
1737 * Used by TB code when we missed a PC check after a branch.
1738 */
1739IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1740{
1741 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1742 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1743 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1744 pVCpu->iem.s.pbInstrBuf));
1745 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1746 return VINF_IEM_REEXEC_BREAK;
1747}
1748
1749
1750
1751/*********************************************************************************************************************************
1752* Helpers: Segmented memory fetches and stores. *
1753*********************************************************************************************************************************/
1754
1755/**
1756 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1757 */
1758IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1759{
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1761 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1762#else
1763 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1764#endif
1765}
1766
1767
1768/**
1769 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1770 * to 16 bits.
1771 */
1772IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1773{
1774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1775 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1776#else
1777 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1778#endif
1779}
1780
1781
1782/**
1783 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1784 * to 32 bits.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1789 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1790#else
1791 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1792#endif
1793}
1794
1795/**
1796 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1797 * to 64 bits.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1802 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1803#else
1804 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1824 * to 32 bits.
1825 */
1826IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1827{
1828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1829 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1830#else
1831 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1832#endif
1833}
1834
1835
1836/**
1837 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1838 * to 64 bits.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1843 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1844#else
1845 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1856 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1857#else
1858 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1865 * to 64 bits.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1870 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1871#else
1872 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1883 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1884#else
1885 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1886#endif
1887}
1888
1889
1890#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1891/**
1892 * Used by TB code to load 128-bit data w/ segmentation.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1897 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1898#else
1899 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to load 128-bit data w/ segmentation.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1910 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1911#else
1912 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to load 128-bit data w/ segmentation.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1923 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1924#else
1925 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to load 256-bit data w/ segmentation.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1936 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1937#else
1938 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to load 256-bit data w/ segmentation.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1949 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1950#else
1951 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1952#endif
1953}
1954#endif
1955
1956
1957/**
1958 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1963 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1964#else
1965 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1976 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1977#else
1978 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1989 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1990#else
1991 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2002 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2003#else
2004 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2005#endif
2006}
2007
2008
2009#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2010/**
2011 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2012 */
2013IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2014{
2015#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2016 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2017#else
2018 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2019#endif
2020}
2021
2022
2023/**
2024 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2029 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2030#else
2031 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2040{
2041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2042 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2043#else
2044 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2045#endif
2046}
2047
2048
2049/**
2050 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2055 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2056#else
2057 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2058#endif
2059}
2060#endif
2061
2062
2063
2064/**
2065 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2068{
2069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2070 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2071#else
2072 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2073#endif
2074}
2075
2076
2077/**
2078 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2079 */
2080IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2081{
2082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2083 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2084#else
2085 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2086#endif
2087}
2088
2089
2090/**
2091 * Used by TB code to store an 32-bit selector value onto a generic stack.
2092 *
2093 * Intel CPUs doesn't do write a whole dword, thus the special function.
2094 */
2095IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2096{
2097#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2098 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2099#else
2100 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2101#endif
2102}
2103
2104
2105/**
2106 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2107 */
2108IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2109{
2110#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2111 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2112#else
2113 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2114#endif
2115}
2116
2117
2118/**
2119 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2122{
2123#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2124 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2125#else
2126 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2127#endif
2128}
2129
2130
2131/**
2132 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2137 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2138#else
2139 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2150 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2151#else
2152 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2153#endif
2154}
2155
2156
2157
2158/*********************************************************************************************************************************
2159* Helpers: Flat memory fetches and stores. *
2160*********************************************************************************************************************************/
2161
2162/**
2163 * Used by TB code to load unsigned 8-bit data w/ flat address.
2164 * @note Zero extending the value to 64-bit to simplify assembly.
2165 */
2166IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2167{
2168#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2169 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2170#else
2171 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2172#endif
2173}
2174
2175
2176/**
2177 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2178 * to 16 bits.
2179 * @note Zero extending the value to 64-bit to simplify assembly.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2182{
2183#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2184 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2185#else
2186 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2187#endif
2188}
2189
2190
2191/**
2192 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2193 * to 32 bits.
2194 * @note Zero extending the value to 64-bit to simplify assembly.
2195 */
2196IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2197{
2198#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2199 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2200#else
2201 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2202#endif
2203}
2204
2205
2206/**
2207 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2208 * to 64 bits.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2213 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2214#else
2215 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2216#endif
2217}
2218
2219
2220/**
2221 * Used by TB code to load unsigned 16-bit data w/ flat address.
2222 * @note Zero extending the value to 64-bit to simplify assembly.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2227 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2228#else
2229 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2236 * to 32 bits.
2237 * @note Zero extending the value to 64-bit to simplify assembly.
2238 */
2239IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2240{
2241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2242 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2243#else
2244 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2245#endif
2246}
2247
2248
2249/**
2250 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2251 * to 64 bits.
2252 * @note Zero extending the value to 64-bit to simplify assembly.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2255{
2256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2257 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2258#else
2259 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2260#endif
2261}
2262
2263
2264/**
2265 * Used by TB code to load unsigned 32-bit data w/ flat address.
2266 * @note Zero extending the value to 64-bit to simplify assembly.
2267 */
2268IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2269{
2270#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2271 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2272#else
2273 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2274#endif
2275}
2276
2277
2278/**
2279 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2280 * to 64 bits.
2281 * @note Zero extending the value to 64-bit to simplify assembly.
2282 */
2283IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2284{
2285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2286 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2287#else
2288 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2289#endif
2290}
2291
2292
2293/**
2294 * Used by TB code to load unsigned 64-bit data w/ flat address.
2295 */
2296IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2297{
2298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2299 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2300#else
2301 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2302#endif
2303}
2304
2305
2306#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2307/**
2308 * Used by TB code to load unsigned 128-bit data w/ flat address.
2309 */
2310IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2311{
2312#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2313 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2314#else
2315 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2316#endif
2317}
2318
2319
2320/**
2321 * Used by TB code to load unsigned 128-bit data w/ flat address.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2324{
2325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2326 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2327#else
2328 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2329#endif
2330}
2331
2332
2333/**
2334 * Used by TB code to load unsigned 128-bit data w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2337{
2338#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2339 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2340#else
2341 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2342#endif
2343}
2344
2345
2346/**
2347 * Used by TB code to load unsigned 256-bit data w/ flat address.
2348 */
2349IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2350{
2351#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2352 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2353#else
2354 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2355#endif
2356}
2357
2358
2359/**
2360 * Used by TB code to load unsigned 256-bit data w/ flat address.
2361 */
2362IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2365 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2366#else
2367 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2368#endif
2369}
2370#endif
2371
2372
2373/**
2374 * Used by TB code to store unsigned 8-bit data w/ flat address.
2375 */
2376IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2379 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2380#else
2381 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to store unsigned 16-bit data w/ flat address.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2390{
2391#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2392 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2393#else
2394 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2395#endif
2396}
2397
2398
2399/**
2400 * Used by TB code to store unsigned 32-bit data w/ flat address.
2401 */
2402IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2403{
2404#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2405 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2406#else
2407 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2408#endif
2409}
2410
2411
2412/**
2413 * Used by TB code to store unsigned 64-bit data w/ flat address.
2414 */
2415IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2416{
2417#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2418 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2419#else
2420 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2421#endif
2422}
2423
2424
2425#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2426/**
2427 * Used by TB code to store unsigned 128-bit data w/ flat address.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2432 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2433#else
2434 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to store unsigned 128-bit data w/ flat address.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2445 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2446#else
2447 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to store unsigned 256-bit data w/ flat address.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2458 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2459#else
2460 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to store unsigned 256-bit data w/ flat address.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2471 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2472#else
2473 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2474#endif
2475}
2476#endif
2477
2478
2479
2480/**
2481 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2482 */
2483IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2484{
2485#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2486 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2487#else
2488 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2489#endif
2490}
2491
2492
2493/**
2494 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2497{
2498#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2499 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2500#else
2501 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2502#endif
2503}
2504
2505
2506/**
2507 * Used by TB code to store a segment selector value onto a flat stack.
2508 *
2509 * Intel CPUs doesn't do write a whole dword, thus the special function.
2510 */
2511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2512{
2513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2514 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2515#else
2516 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2517#endif
2518}
2519
2520
2521/**
2522 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2523 */
2524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2525{
2526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2527 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2528#else
2529 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2530#endif
2531}
2532
2533
2534/**
2535 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2536 */
2537IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2540 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2541#else
2542 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2553 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2554#else
2555 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2566 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2567#else
2568 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2569#endif
2570}
2571
2572
2573
2574/*********************************************************************************************************************************
2575* Helpers: Segmented memory mapping. *
2576*********************************************************************************************************************************/
2577
2578/**
2579 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2580 * segmentation.
2581 */
2582IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2583 RTGCPTR GCPtrMem, uint8_t iSegReg))
2584{
2585#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2586 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2587#else
2588 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2589#endif
2590}
2591
2592
2593/**
2594 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2595 */
2596IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2597 RTGCPTR GCPtrMem, uint8_t iSegReg))
2598{
2599#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2600 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2601#else
2602 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2603#endif
2604}
2605
2606
2607/**
2608 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2609 */
2610IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2611 RTGCPTR GCPtrMem, uint8_t iSegReg))
2612{
2613#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2614 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2615#else
2616 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2617#endif
2618}
2619
2620
2621/**
2622 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2623 */
2624IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2625 RTGCPTR GCPtrMem, uint8_t iSegReg))
2626{
2627#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2628 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#else
2630 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2631#endif
2632}
2633
2634
2635/**
2636 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2637 * segmentation.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2640 RTGCPTR GCPtrMem, uint8_t iSegReg))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2644#else
2645 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2654 RTGCPTR GCPtrMem, uint8_t iSegReg))
2655{
2656#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2657 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2658#else
2659 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2660#endif
2661}
2662
2663
2664/**
2665 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2666 */
2667IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2668 RTGCPTR GCPtrMem, uint8_t iSegReg))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2672#else
2673 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2682 RTGCPTR GCPtrMem, uint8_t iSegReg))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#else
2687 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2694 * segmentation.
2695 */
2696IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2697 RTGCPTR GCPtrMem, uint8_t iSegReg))
2698{
2699#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2700 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2701#else
2702 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2703#endif
2704}
2705
2706
2707/**
2708 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2709 */
2710IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2711 RTGCPTR GCPtrMem, uint8_t iSegReg))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2715#else
2716 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2725 RTGCPTR GCPtrMem, uint8_t iSegReg))
2726{
2727#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2728 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2729#else
2730 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2731#endif
2732}
2733
2734
2735/**
2736 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2737 */
2738IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2739 RTGCPTR GCPtrMem, uint8_t iSegReg))
2740{
2741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2742 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#else
2744 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2745#endif
2746}
2747
2748
2749/**
2750 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2751 * segmentation.
2752 */
2753IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2754 RTGCPTR GCPtrMem, uint8_t iSegReg))
2755{
2756#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2757 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2758#else
2759 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2760#endif
2761}
2762
2763
2764/**
2765 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2766 */
2767IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2768 RTGCPTR GCPtrMem, uint8_t iSegReg))
2769{
2770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2771 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2772#else
2773 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2774#endif
2775}
2776
2777
2778/**
2779 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2780 */
2781IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2782 RTGCPTR GCPtrMem, uint8_t iSegReg))
2783{
2784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2785 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2786#else
2787 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2788#endif
2789}
2790
2791
2792/**
2793 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2794 */
2795IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2796 RTGCPTR GCPtrMem, uint8_t iSegReg))
2797{
2798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2799 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2800#else
2801 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2802#endif
2803}
2804
2805
2806/**
2807 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2808 */
2809IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2810 RTGCPTR GCPtrMem, uint8_t iSegReg))
2811{
2812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2813 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2814#else
2815 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2816#endif
2817}
2818
2819
2820/**
2821 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2822 */
2823IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2824 RTGCPTR GCPtrMem, uint8_t iSegReg))
2825{
2826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2827 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#else
2829 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2830#endif
2831}
2832
2833
2834/**
2835 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2836 * segmentation.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2839 RTGCPTR GCPtrMem, uint8_t iSegReg))
2840{
2841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2842 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2843#else
2844 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2845#endif
2846}
2847
2848
2849/**
2850 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2851 */
2852IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2853 RTGCPTR GCPtrMem, uint8_t iSegReg))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2857#else
2858 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2867 RTGCPTR GCPtrMem, uint8_t iSegReg))
2868{
2869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2870 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2871#else
2872 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2873#endif
2874}
2875
2876
2877/**
2878 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2879 */
2880IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2881 RTGCPTR GCPtrMem, uint8_t iSegReg))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2885#else
2886 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2887#endif
2888}
2889
2890
2891/*********************************************************************************************************************************
2892* Helpers: Flat memory mapping. *
2893*********************************************************************************************************************************/
2894
2895/**
2896 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2897 * address.
2898 */
2899IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2900{
2901#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2902 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2903#else
2904 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2905#endif
2906}
2907
2908
2909/**
2910 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2911 */
2912IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2913{
2914#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2915 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2916#else
2917 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2918#endif
2919}
2920
2921
2922/**
2923 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2924 */
2925IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2926{
2927#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2928 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2929#else
2930 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2931#endif
2932}
2933
2934
2935/**
2936 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2937 */
2938IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2939{
2940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2941 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2942#else
2943 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2944#endif
2945}
2946
2947
2948/**
2949 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2950 * address.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2953{
2954#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2955 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2956#else
2957 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2958#endif
2959}
2960
2961
2962/**
2963 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2964 */
2965IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2966{
2967#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2968 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2969#else
2970 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2971#endif
2972}
2973
2974
2975/**
2976 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2977 */
2978IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2979{
2980#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2981 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2982#else
2983 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2984#endif
2985}
2986
2987
2988/**
2989 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2990 */
2991IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2992{
2993#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2994 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2995#else
2996 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2997#endif
2998}
2999
3000
3001/**
3002 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
3003 * address.
3004 */
3005IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3006{
3007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3008 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3009#else
3010 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3011#endif
3012}
3013
3014
3015/**
3016 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3017 */
3018IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3019{
3020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3021 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3022#else
3023 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3024#endif
3025}
3026
3027
3028/**
3029 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3030 */
3031IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3032{
3033#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3034 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3035#else
3036 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3037#endif
3038}
3039
3040
3041/**
3042 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3043 */
3044IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3045{
3046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3047 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3048#else
3049 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3050#endif
3051}
3052
3053
3054/**
3055 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3056 * address.
3057 */
3058IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3059{
3060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3061 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3062#else
3063 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3064#endif
3065}
3066
3067
3068/**
3069 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3070 */
3071IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3072{
3073#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3074 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3075#else
3076 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3077#endif
3078}
3079
3080
3081/**
3082 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3083 */
3084IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3085{
3086#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3087 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3088#else
3089 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3090#endif
3091}
3092
3093
3094/**
3095 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3096 */
3097IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3098{
3099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3100 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3101#else
3102 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3103#endif
3104}
3105
3106
3107/**
3108 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3109 */
3110IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3111{
3112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3113 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3114#else
3115 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3116#endif
3117}
3118
3119
3120/**
3121 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3122 */
3123IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3124{
3125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3126 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3127#else
3128 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3129#endif
3130}
3131
3132
3133/**
3134 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3135 * address.
3136 */
3137IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3138{
3139#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3140 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3141#else
3142 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3143#endif
3144}
3145
3146
3147/**
3148 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3149 */
3150IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3151{
3152#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3153 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3154#else
3155 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3156#endif
3157}
3158
3159
3160/**
3161 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3162 */
3163IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3164{
3165#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3166 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3167#else
3168 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3169#endif
3170}
3171
3172
3173/**
3174 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3177{
3178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3179 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3180#else
3181 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3182#endif
3183}
3184
3185
3186/*********************************************************************************************************************************
3187* Helpers: Commit, rollback & unmap *
3188*********************************************************************************************************************************/
3189
3190/**
3191 * Used by TB code to commit and unmap a read-write memory mapping.
3192 */
3193IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3194{
3195 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3196}
3197
3198
3199/**
3200 * Used by TB code to commit and unmap a read-write memory mapping.
3201 */
3202IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3203{
3204 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3205}
3206
3207
3208/**
3209 * Used by TB code to commit and unmap a write-only memory mapping.
3210 */
3211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3212{
3213 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3214}
3215
3216
3217/**
3218 * Used by TB code to commit and unmap a read-only memory mapping.
3219 */
3220IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3221{
3222 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3223}
3224
3225
3226/**
3227 * Reinitializes the native recompiler state.
3228 *
3229 * Called before starting a new recompile job.
3230 */
3231static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3232{
3233 pReNative->cLabels = 0;
3234 pReNative->bmLabelTypes = 0;
3235 pReNative->cFixups = 0;
3236#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3237 pReNative->pDbgInfo->cEntries = 0;
3238#endif
3239 pReNative->pTbOrg = pTb;
3240 pReNative->cCondDepth = 0;
3241 pReNative->uCondSeqNo = 0;
3242 pReNative->uCheckIrqSeqNo = 0;
3243 pReNative->uTlbSeqNo = 0;
3244
3245#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3246 pReNative->Core.offPc = 0;
3247 pReNative->Core.cInstrPcUpdateSkipped = 0;
3248#endif
3249#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3250 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3251#endif
3252 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3253#if IEMNATIVE_HST_GREG_COUNT < 32
3254 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3255#endif
3256 ;
3257 pReNative->Core.bmHstRegsWithGstShadow = 0;
3258 pReNative->Core.bmGstRegShadows = 0;
3259 pReNative->Core.bmVars = 0;
3260 pReNative->Core.bmStack = 0;
3261 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3262 pReNative->Core.u64ArgVars = UINT64_MAX;
3263
3264 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
3265 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3266 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3267 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3268 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3269 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3270 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3271 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3272 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3273 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3274 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3275 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3276 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3277 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3278 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3279 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3280 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3281 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
3282
3283 /* Full host register reinit: */
3284 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3285 {
3286 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3287 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3288 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3289 }
3290
3291 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3292 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3293#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3294 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3295#endif
3296#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3297 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3298#endif
3299#ifdef IEMNATIVE_REG_FIXED_TMP1
3300 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3301#endif
3302#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3303 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3304#endif
3305 );
3306 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3307 {
3308 fRegs &= ~RT_BIT_32(idxReg);
3309 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3310 }
3311
3312 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3313#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3314 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3315#endif
3316#ifdef IEMNATIVE_REG_FIXED_TMP0
3317 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3318#endif
3319#ifdef IEMNATIVE_REG_FIXED_TMP1
3320 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3321#endif
3322#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3323 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3324#endif
3325
3326#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3327# ifdef RT_ARCH_ARM64
3328 /*
3329 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3330 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3331 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3332 * and the register allocator assumes that it will be always free when the lower is picked.
3333 */
3334 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3335# else
3336 uint32_t const fFixedAdditional = 0;
3337# endif
3338
3339 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3340 | fFixedAdditional
3341# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3342 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3343# endif
3344 ;
3345 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3346 pReNative->Core.bmGstSimdRegShadows = 0;
3347 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3348 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3349
3350 /* Full host register reinit: */
3351 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3352 {
3353 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3354 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3355 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3356 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3357 }
3358
3359 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3360 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3361 {
3362 fRegs &= ~RT_BIT_32(idxReg);
3363 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3364 }
3365
3366#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3367 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3368#endif
3369
3370#endif
3371
3372 return pReNative;
3373}
3374
3375
3376/**
3377 * Allocates and initializes the native recompiler state.
3378 *
3379 * This is called the first time an EMT wants to recompile something.
3380 *
3381 * @returns Pointer to the new recompiler state.
3382 * @param pVCpu The cross context virtual CPU structure of the calling
3383 * thread.
3384 * @param pTb The TB that's about to be recompiled.
3385 * @thread EMT(pVCpu)
3386 */
3387static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3388{
3389 VMCPU_ASSERT_EMT(pVCpu);
3390
3391 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3392 AssertReturn(pReNative, NULL);
3393
3394 /*
3395 * Try allocate all the buffers and stuff we need.
3396 */
3397 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3398 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3399 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3400#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3401 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3402#endif
3403 if (RT_LIKELY( pReNative->pInstrBuf
3404 && pReNative->paLabels
3405 && pReNative->paFixups)
3406#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3407 && pReNative->pDbgInfo
3408#endif
3409 )
3410 {
3411 /*
3412 * Set the buffer & array sizes on success.
3413 */
3414 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3415 pReNative->cLabelsAlloc = _8K;
3416 pReNative->cFixupsAlloc = _16K;
3417#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3418 pReNative->cDbgInfoAlloc = _16K;
3419#endif
3420
3421 /* Other constant stuff: */
3422 pReNative->pVCpu = pVCpu;
3423
3424 /*
3425 * Done, just need to save it and reinit it.
3426 */
3427 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3428 return iemNativeReInit(pReNative, pTb);
3429 }
3430
3431 /*
3432 * Failed. Cleanup and return.
3433 */
3434 AssertFailed();
3435 RTMemFree(pReNative->pInstrBuf);
3436 RTMemFree(pReNative->paLabels);
3437 RTMemFree(pReNative->paFixups);
3438#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3439 RTMemFree(pReNative->pDbgInfo);
3440#endif
3441 RTMemFree(pReNative);
3442 return NULL;
3443}
3444
3445
3446/**
3447 * Creates a label
3448 *
3449 * If the label does not yet have a defined position,
3450 * call iemNativeLabelDefine() later to set it.
3451 *
3452 * @returns Label ID. Throws VBox status code on failure, so no need to check
3453 * the return value.
3454 * @param pReNative The native recompile state.
3455 * @param enmType The label type.
3456 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3457 * label is not yet defined (default).
3458 * @param uData Data associated with the lable. Only applicable to
3459 * certain type of labels. Default is zero.
3460 */
3461DECL_HIDDEN_THROW(uint32_t)
3462iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3463 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3464{
3465 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3466
3467 /*
3468 * Locate existing label definition.
3469 *
3470 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3471 * and uData is zero.
3472 */
3473 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3474 uint32_t const cLabels = pReNative->cLabels;
3475 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3476#ifndef VBOX_STRICT
3477 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3478 && offWhere == UINT32_MAX
3479 && uData == 0
3480#endif
3481 )
3482 {
3483#ifndef VBOX_STRICT
3484 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3485 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3486 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3487 if (idxLabel < pReNative->cLabels)
3488 return idxLabel;
3489#else
3490 for (uint32_t i = 0; i < cLabels; i++)
3491 if ( paLabels[i].enmType == enmType
3492 && paLabels[i].uData == uData)
3493 {
3494 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3495 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3496 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3497 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3498 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3499 return i;
3500 }
3501 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3502 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3503#endif
3504 }
3505
3506 /*
3507 * Make sure we've got room for another label.
3508 */
3509 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3510 { /* likely */ }
3511 else
3512 {
3513 uint32_t cNew = pReNative->cLabelsAlloc;
3514 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3515 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3516 cNew *= 2;
3517 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3518 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3519 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3520 pReNative->paLabels = paLabels;
3521 pReNative->cLabelsAlloc = cNew;
3522 }
3523
3524 /*
3525 * Define a new label.
3526 */
3527 paLabels[cLabels].off = offWhere;
3528 paLabels[cLabels].enmType = enmType;
3529 paLabels[cLabels].uData = uData;
3530 pReNative->cLabels = cLabels + 1;
3531
3532 Assert((unsigned)enmType < 64);
3533 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3534
3535 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3536 {
3537 Assert(uData == 0);
3538 pReNative->aidxUniqueLabels[enmType] = cLabels;
3539 }
3540
3541 if (offWhere != UINT32_MAX)
3542 {
3543#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3544 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3545 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3546#endif
3547 }
3548 return cLabels;
3549}
3550
3551
3552/**
3553 * Defines the location of an existing label.
3554 *
3555 * @param pReNative The native recompile state.
3556 * @param idxLabel The label to define.
3557 * @param offWhere The position.
3558 */
3559DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3560{
3561 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3562 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3563 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3564 pLabel->off = offWhere;
3565#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3566 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3567 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3568#endif
3569}
3570
3571
3572/**
3573 * Looks up a lable.
3574 *
3575 * @returns Label ID if found, UINT32_MAX if not.
3576 */
3577static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3578 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3579{
3580 Assert((unsigned)enmType < 64);
3581 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3582 {
3583 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3584 return pReNative->aidxUniqueLabels[enmType];
3585
3586 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3587 uint32_t const cLabels = pReNative->cLabels;
3588 for (uint32_t i = 0; i < cLabels; i++)
3589 if ( paLabels[i].enmType == enmType
3590 && paLabels[i].uData == uData
3591 && ( paLabels[i].off == offWhere
3592 || offWhere == UINT32_MAX
3593 || paLabels[i].off == UINT32_MAX))
3594 return i;
3595 }
3596 return UINT32_MAX;
3597}
3598
3599
3600/**
3601 * Adds a fixup.
3602 *
3603 * @throws VBox status code (int) on failure.
3604 * @param pReNative The native recompile state.
3605 * @param offWhere The instruction offset of the fixup location.
3606 * @param idxLabel The target label ID for the fixup.
3607 * @param enmType The fixup type.
3608 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3609 */
3610DECL_HIDDEN_THROW(void)
3611iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3612 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3613{
3614 Assert(idxLabel <= UINT16_MAX);
3615 Assert((unsigned)enmType <= UINT8_MAX);
3616#ifdef RT_ARCH_ARM64
3617 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3618 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3619 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3620#endif
3621
3622 /*
3623 * Make sure we've room.
3624 */
3625 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3626 uint32_t const cFixups = pReNative->cFixups;
3627 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3628 { /* likely */ }
3629 else
3630 {
3631 uint32_t cNew = pReNative->cFixupsAlloc;
3632 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3633 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3634 cNew *= 2;
3635 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3636 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3637 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3638 pReNative->paFixups = paFixups;
3639 pReNative->cFixupsAlloc = cNew;
3640 }
3641
3642 /*
3643 * Add the fixup.
3644 */
3645 paFixups[cFixups].off = offWhere;
3646 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3647 paFixups[cFixups].enmType = enmType;
3648 paFixups[cFixups].offAddend = offAddend;
3649 pReNative->cFixups = cFixups + 1;
3650}
3651
3652
3653/**
3654 * Slow code path for iemNativeInstrBufEnsure.
3655 */
3656DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3657{
3658 /* Double the buffer size till we meet the request. */
3659 uint32_t cNew = pReNative->cInstrBufAlloc;
3660 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3661 do
3662 cNew *= 2;
3663 while (cNew < off + cInstrReq);
3664
3665 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3666#ifdef RT_ARCH_ARM64
3667 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3668#else
3669 uint32_t const cbMaxInstrBuf = _2M;
3670#endif
3671 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3672
3673 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3674 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3675
3676#ifdef VBOX_STRICT
3677 pReNative->offInstrBufChecked = off + cInstrReq;
3678#endif
3679 pReNative->cInstrBufAlloc = cNew;
3680 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3681}
3682
3683#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3684
3685/**
3686 * Grows the static debug info array used during recompilation.
3687 *
3688 * @returns Pointer to the new debug info block; throws VBox status code on
3689 * failure, so no need to check the return value.
3690 */
3691DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3692{
3693 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3694 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3695 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3696 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3697 pReNative->pDbgInfo = pDbgInfo;
3698 pReNative->cDbgInfoAlloc = cNew;
3699 return pDbgInfo;
3700}
3701
3702
3703/**
3704 * Adds a new debug info uninitialized entry, returning the pointer to it.
3705 */
3706DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3707{
3708 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3709 { /* likely */ }
3710 else
3711 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3712 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3713}
3714
3715
3716/**
3717 * Debug Info: Adds a native offset record, if necessary.
3718 */
3719DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3720{
3721 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3722
3723 /*
3724 * Search backwards to see if we've got a similar record already.
3725 */
3726 uint32_t idx = pDbgInfo->cEntries;
3727 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3728 while (idx-- > idxStop)
3729 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3730 {
3731 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3732 return;
3733 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3734 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3735 break;
3736 }
3737
3738 /*
3739 * Add it.
3740 */
3741 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3742 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3743 pEntry->NativeOffset.offNative = off;
3744}
3745
3746
3747/**
3748 * Debug Info: Record info about a label.
3749 */
3750static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3751{
3752 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3753 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3754 pEntry->Label.uUnused = 0;
3755 pEntry->Label.enmLabel = (uint8_t)enmType;
3756 pEntry->Label.uData = uData;
3757}
3758
3759
3760/**
3761 * Debug Info: Record info about a threaded call.
3762 */
3763static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3764{
3765 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3766 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3767 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3768 pEntry->ThreadedCall.uUnused = 0;
3769 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3770}
3771
3772
3773/**
3774 * Debug Info: Record info about a new guest instruction.
3775 */
3776static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3777{
3778 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3779 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3780 pEntry->GuestInstruction.uUnused = 0;
3781 pEntry->GuestInstruction.fExec = fExec;
3782}
3783
3784
3785/**
3786 * Debug Info: Record info about guest register shadowing.
3787 */
3788DECL_HIDDEN_THROW(void)
3789iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3790 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3791{
3792 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3793 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3794 pEntry->GuestRegShadowing.uUnused = 0;
3795 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3796 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3797 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3798}
3799
3800
3801# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3802/**
3803 * Debug Info: Record info about guest register shadowing.
3804 */
3805DECL_HIDDEN_THROW(void)
3806iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3807 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3808{
3809 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3810 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3811 pEntry->GuestSimdRegShadowing.uUnused = 0;
3812 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3813 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3814 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3815}
3816# endif
3817
3818
3819# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3820/**
3821 * Debug Info: Record info about delayed RIP updates.
3822 */
3823DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3824{
3825 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3826 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3827 pEntry->DelayedPcUpdate.offPc = offPc;
3828 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3829}
3830# endif
3831
3832#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3833
3834
3835/*********************************************************************************************************************************
3836* Register Allocator *
3837*********************************************************************************************************************************/
3838
3839/**
3840 * Register parameter indexes (indexed by argument number).
3841 */
3842DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3843{
3844 IEMNATIVE_CALL_ARG0_GREG,
3845 IEMNATIVE_CALL_ARG1_GREG,
3846 IEMNATIVE_CALL_ARG2_GREG,
3847 IEMNATIVE_CALL_ARG3_GREG,
3848#if defined(IEMNATIVE_CALL_ARG4_GREG)
3849 IEMNATIVE_CALL_ARG4_GREG,
3850# if defined(IEMNATIVE_CALL_ARG5_GREG)
3851 IEMNATIVE_CALL_ARG5_GREG,
3852# if defined(IEMNATIVE_CALL_ARG6_GREG)
3853 IEMNATIVE_CALL_ARG6_GREG,
3854# if defined(IEMNATIVE_CALL_ARG7_GREG)
3855 IEMNATIVE_CALL_ARG7_GREG,
3856# endif
3857# endif
3858# endif
3859#endif
3860};
3861AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3862
3863/**
3864 * Call register masks indexed by argument count.
3865 */
3866DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3867{
3868 0,
3869 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3870 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3871 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3872 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3873 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3874#if defined(IEMNATIVE_CALL_ARG4_GREG)
3875 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3876 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3877# if defined(IEMNATIVE_CALL_ARG5_GREG)
3878 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3879 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3880# if defined(IEMNATIVE_CALL_ARG6_GREG)
3881 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3882 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3883 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3884# if defined(IEMNATIVE_CALL_ARG7_GREG)
3885 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3886 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3887 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3888# endif
3889# endif
3890# endif
3891#endif
3892};
3893
3894#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3895/**
3896 * BP offset of the stack argument slots.
3897 *
3898 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3899 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3900 */
3901DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3902{
3903 IEMNATIVE_FP_OFF_STACK_ARG0,
3904# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3905 IEMNATIVE_FP_OFF_STACK_ARG1,
3906# endif
3907# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3908 IEMNATIVE_FP_OFF_STACK_ARG2,
3909# endif
3910# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3911 IEMNATIVE_FP_OFF_STACK_ARG3,
3912# endif
3913};
3914AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3915#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3916
3917/**
3918 * Info about shadowed guest register values.
3919 * @see IEMNATIVEGSTREG
3920 */
3921DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3922{
3923#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3924 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3925 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3926 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3927 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3928 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3929 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3930 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3931 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3932 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3933 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3934 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3935 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3936 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3937 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3938 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3939 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3940 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3941 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3942 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3943 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3944 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3945 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3946 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3947 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3948 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3949 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3950 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3951 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3952 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3953 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3954 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3955 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3956 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3957 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3958 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3959 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3960 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3961 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3962 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3963 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3964 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3965 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3966 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3967 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3968 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3969 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3970 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3971 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3972#undef CPUMCTX_OFF_AND_SIZE
3973};
3974AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3975
3976
3977/** Host CPU general purpose register names. */
3978DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3979{
3980#ifdef RT_ARCH_AMD64
3981 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3982#elif RT_ARCH_ARM64
3983 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3984 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3985#else
3986# error "port me"
3987#endif
3988};
3989
3990
3991#if 0 /* unused */
3992/**
3993 * Tries to locate a suitable register in the given register mask.
3994 *
3995 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3996 * failed.
3997 *
3998 * @returns Host register number on success, returns UINT8_MAX on failure.
3999 */
4000static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
4001{
4002 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4003 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4004 if (fRegs)
4005 {
4006 /** @todo pick better here: */
4007 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
4008
4009 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4010 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4011 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4012 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4013
4014 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4015 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4016 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4017 return idxReg;
4018 }
4019 return UINT8_MAX;
4020}
4021#endif /* unused */
4022
4023
4024/**
4025 * Locate a register, possibly freeing one up.
4026 *
4027 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4028 * failed.
4029 *
4030 * @returns Host register number on success. Returns UINT8_MAX if no registers
4031 * found, the caller is supposed to deal with this and raise a
4032 * allocation type specific status code (if desired).
4033 *
4034 * @throws VBox status code if we're run into trouble spilling a variable of
4035 * recording debug info. Does NOT throw anything if we're out of
4036 * registers, though.
4037 */
4038static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4039 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4040{
4041 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4042 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4043 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4044
4045 /*
4046 * Try a freed register that's shadowing a guest register.
4047 */
4048 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4049 if (fRegs)
4050 {
4051 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4052
4053#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4054 /*
4055 * When we have livness information, we use it to kick out all shadowed
4056 * guest register that will not be needed any more in this TB. If we're
4057 * lucky, this may prevent us from ending up here again.
4058 *
4059 * Note! We must consider the previous entry here so we don't free
4060 * anything that the current threaded function requires (current
4061 * entry is produced by the next threaded function).
4062 */
4063 uint32_t const idxCurCall = pReNative->idxCurCall;
4064 if (idxCurCall > 0)
4065 {
4066 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4067
4068# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4069 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4070 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4071 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4072#else
4073 /* Construct a mask of the registers not in the read or write state.
4074 Note! We could skips writes, if they aren't from us, as this is just
4075 a hack to prevent trashing registers that have just been written
4076 or will be written when we retire the current instruction. */
4077 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4078 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4079 & IEMLIVENESSBIT_MASK;
4080#endif
4081 /* Merge EFLAGS. */
4082 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4083 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4084 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4085 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4086 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4087
4088 /* If it matches any shadowed registers. */
4089 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4090 {
4091 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4092 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4093 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4094
4095 /* See if we've got any unshadowed registers we can return now. */
4096 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4097 if (fUnshadowedRegs)
4098 {
4099 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4100 return (fPreferVolatile
4101 ? ASMBitFirstSetU32(fUnshadowedRegs)
4102 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4103 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4104 - 1;
4105 }
4106 }
4107 }
4108#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4109
4110 unsigned const idxReg = (fPreferVolatile
4111 ? ASMBitFirstSetU32(fRegs)
4112 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4113 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4114 - 1;
4115
4116 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4117 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4118 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4119 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4120
4121 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4122 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4123 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4124 return idxReg;
4125 }
4126
4127 /*
4128 * Try free up a variable that's in a register.
4129 *
4130 * We do two rounds here, first evacuating variables we don't need to be
4131 * saved on the stack, then in the second round move things to the stack.
4132 */
4133 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4134 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4135 {
4136 uint32_t fVars = pReNative->Core.bmVars;
4137 while (fVars)
4138 {
4139 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4140 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4141#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4142 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4143 continue;
4144#endif
4145
4146 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4147 && (RT_BIT_32(idxReg) & fRegMask)
4148 && ( iLoop == 0
4149 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4150 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4151 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4152 {
4153 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4154 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4155 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4156 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4157 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4158 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4159
4160 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4161 {
4162 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4163 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4164 }
4165
4166 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4167 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4168
4169 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4170 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4171 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4172 return idxReg;
4173 }
4174 fVars &= ~RT_BIT_32(idxVar);
4175 }
4176 }
4177
4178 return UINT8_MAX;
4179}
4180
4181
4182/**
4183 * Reassigns a variable to a different register specified by the caller.
4184 *
4185 * @returns The new code buffer position.
4186 * @param pReNative The native recompile state.
4187 * @param off The current code buffer position.
4188 * @param idxVar The variable index.
4189 * @param idxRegOld The old host register number.
4190 * @param idxRegNew The new host register number.
4191 * @param pszCaller The caller for logging.
4192 */
4193static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4194 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4195{
4196 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4197 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4198#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4199 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4200#endif
4201 RT_NOREF(pszCaller);
4202
4203 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4204
4205 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4206 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4207 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4208 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4209
4210 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4211 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4212 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4213 if (fGstRegShadows)
4214 {
4215 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4216 | RT_BIT_32(idxRegNew);
4217 while (fGstRegShadows)
4218 {
4219 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4220 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4221
4222 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4223 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4224 }
4225 }
4226
4227 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4228 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4229 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4230 return off;
4231}
4232
4233
4234/**
4235 * Moves a variable to a different register or spills it onto the stack.
4236 *
4237 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4238 * kinds can easily be recreated if needed later.
4239 *
4240 * @returns The new code buffer position.
4241 * @param pReNative The native recompile state.
4242 * @param off The current code buffer position.
4243 * @param idxVar The variable index.
4244 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4245 * call-volatile registers.
4246 */
4247DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4248 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4249{
4250 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4251 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4252 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4253 Assert(!pVar->fRegAcquired);
4254
4255 uint8_t const idxRegOld = pVar->idxReg;
4256 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4257 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4258 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4259 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4260 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4261 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4262 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4263 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4264
4265
4266 /** @todo Add statistics on this.*/
4267 /** @todo Implement basic variable liveness analysis (python) so variables
4268 * can be freed immediately once no longer used. This has the potential to
4269 * be trashing registers and stack for dead variables.
4270 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4271
4272 /*
4273 * First try move it to a different register, as that's cheaper.
4274 */
4275 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4276 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4277 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4278 if (fRegs)
4279 {
4280 /* Avoid using shadow registers, if possible. */
4281 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4282 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4283 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4284 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4285 }
4286
4287 /*
4288 * Otherwise we must spill the register onto the stack.
4289 */
4290 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4291 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4292 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4293 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4294
4295 pVar->idxReg = UINT8_MAX;
4296 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4297 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4298 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4299 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4300 return off;
4301}
4302
4303
4304/**
4305 * Allocates a temporary host general purpose register.
4306 *
4307 * This may emit code to save register content onto the stack in order to free
4308 * up a register.
4309 *
4310 * @returns The host register number; throws VBox status code on failure,
4311 * so no need to check the return value.
4312 * @param pReNative The native recompile state.
4313 * @param poff Pointer to the variable with the code buffer position.
4314 * This will be update if we need to move a variable from
4315 * register to stack in order to satisfy the request.
4316 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4317 * registers (@c true, default) or the other way around
4318 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4319 */
4320DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4321{
4322 /*
4323 * Try find a completely unused register, preferably a call-volatile one.
4324 */
4325 uint8_t idxReg;
4326 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4327 & ~pReNative->Core.bmHstRegsWithGstShadow
4328 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4329 if (fRegs)
4330 {
4331 if (fPreferVolatile)
4332 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4333 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4334 else
4335 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4336 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4337 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4338 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4339 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4340 }
4341 else
4342 {
4343 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4344 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4345 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4346 }
4347 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4348}
4349
4350
4351/**
4352 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4353 * registers.
4354 *
4355 * @returns The host register number; throws VBox status code on failure,
4356 * so no need to check the return value.
4357 * @param pReNative The native recompile state.
4358 * @param poff Pointer to the variable with the code buffer position.
4359 * This will be update if we need to move a variable from
4360 * register to stack in order to satisfy the request.
4361 * @param fRegMask Mask of acceptable registers.
4362 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4363 * registers (@c true, default) or the other way around
4364 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4365 */
4366DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4367 bool fPreferVolatile /*= true*/)
4368{
4369 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4370 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4371
4372 /*
4373 * Try find a completely unused register, preferably a call-volatile one.
4374 */
4375 uint8_t idxReg;
4376 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4377 & ~pReNative->Core.bmHstRegsWithGstShadow
4378 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4379 & fRegMask;
4380 if (fRegs)
4381 {
4382 if (fPreferVolatile)
4383 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4384 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4385 else
4386 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4387 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4388 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4389 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4390 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4391 }
4392 else
4393 {
4394 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4395 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4396 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4397 }
4398 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4399}
4400
4401
4402/**
4403 * Allocates a temporary register for loading an immediate value into.
4404 *
4405 * This will emit code to load the immediate, unless there happens to be an
4406 * unused register with the value already loaded.
4407 *
4408 * The caller will not modify the returned register, it must be considered
4409 * read-only. Free using iemNativeRegFreeTmpImm.
4410 *
4411 * @returns The host register number; throws VBox status code on failure, so no
4412 * need to check the return value.
4413 * @param pReNative The native recompile state.
4414 * @param poff Pointer to the variable with the code buffer position.
4415 * @param uImm The immediate value that the register must hold upon
4416 * return.
4417 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4418 * registers (@c true, default) or the other way around
4419 * (@c false).
4420 *
4421 * @note Reusing immediate values has not been implemented yet.
4422 */
4423DECL_HIDDEN_THROW(uint8_t)
4424iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4425{
4426 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4427 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4428 return idxReg;
4429}
4430
4431
4432/**
4433 * Allocates a temporary host general purpose register for keeping a guest
4434 * register value.
4435 *
4436 * Since we may already have a register holding the guest register value,
4437 * code will be emitted to do the loading if that's not the case. Code may also
4438 * be emitted if we have to free up a register to satify the request.
4439 *
4440 * @returns The host register number; throws VBox status code on failure, so no
4441 * need to check the return value.
4442 * @param pReNative The native recompile state.
4443 * @param poff Pointer to the variable with the code buffer
4444 * position. This will be update if we need to move a
4445 * variable from register to stack in order to satisfy
4446 * the request.
4447 * @param enmGstReg The guest register that will is to be updated.
4448 * @param enmIntendedUse How the caller will be using the host register.
4449 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4450 * register is okay (default). The ASSUMPTION here is
4451 * that the caller has already flushed all volatile
4452 * registers, so this is only applied if we allocate a
4453 * new register.
4454 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4455 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4456 */
4457DECL_HIDDEN_THROW(uint8_t)
4458iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4459 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4460 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4461{
4462 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4463#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4464 AssertMsg( fSkipLivenessAssert
4465 || pReNative->idxCurCall == 0
4466 || enmGstReg == kIemNativeGstReg_Pc
4467 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4468 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4469 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4470 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4471 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4472 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4473#endif
4474 RT_NOREF(fSkipLivenessAssert);
4475#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4476 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4477#endif
4478 uint32_t const fRegMask = !fNoVolatileRegs
4479 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4480 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4481
4482 /*
4483 * First check if the guest register value is already in a host register.
4484 */
4485 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4486 {
4487 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4488 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4489 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4490 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4491
4492 /* It's not supposed to be allocated... */
4493 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4494 {
4495 /*
4496 * If the register will trash the guest shadow copy, try find a
4497 * completely unused register we can use instead. If that fails,
4498 * we need to disassociate the host reg from the guest reg.
4499 */
4500 /** @todo would be nice to know if preserving the register is in any way helpful. */
4501 /* If the purpose is calculations, try duplicate the register value as
4502 we'll be clobbering the shadow. */
4503 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4504 && ( ~pReNative->Core.bmHstRegs
4505 & ~pReNative->Core.bmHstRegsWithGstShadow
4506 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4507 {
4508 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4509
4510 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4511
4512 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4513 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4514 g_apszIemNativeHstRegNames[idxRegNew]));
4515 idxReg = idxRegNew;
4516 }
4517 /* If the current register matches the restrictions, go ahead and allocate
4518 it for the caller. */
4519 else if (fRegMask & RT_BIT_32(idxReg))
4520 {
4521 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4522 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4523 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4524 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4525 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4526 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4527 else
4528 {
4529 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4530 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4531 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4532 }
4533 }
4534 /* Otherwise, allocate a register that satisfies the caller and transfer
4535 the shadowing if compatible with the intended use. (This basically
4536 means the call wants a non-volatile register (RSP push/pop scenario).) */
4537 else
4538 {
4539 Assert(fNoVolatileRegs);
4540 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4541 !fNoVolatileRegs
4542 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4543 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4544 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4545 {
4546 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4547 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4548 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4549 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4550 }
4551 else
4552 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4553 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4554 g_apszIemNativeHstRegNames[idxRegNew]));
4555 idxReg = idxRegNew;
4556 }
4557 }
4558 else
4559 {
4560 /*
4561 * Oops. Shadowed guest register already allocated!
4562 *
4563 * Allocate a new register, copy the value and, if updating, the
4564 * guest shadow copy assignment to the new register.
4565 */
4566 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4567 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4568 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4569 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4570
4571 /** @todo share register for readonly access. */
4572 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4573 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4574
4575 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4576 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4577
4578 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4579 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4580 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4581 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4582 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4583 else
4584 {
4585 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4586 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4587 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4588 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4589 }
4590 idxReg = idxRegNew;
4591 }
4592 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4593
4594#ifdef VBOX_STRICT
4595 /* Strict builds: Check that the value is correct. */
4596 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4597#endif
4598
4599 return idxReg;
4600 }
4601
4602 /*
4603 * Allocate a new register, load it with the guest value and designate it as a copy of the
4604 */
4605 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4606
4607 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4608 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4609
4610 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4611 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4612 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4613 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4614
4615 return idxRegNew;
4616}
4617
4618
4619/**
4620 * Allocates a temporary host general purpose register that already holds the
4621 * given guest register value.
4622 *
4623 * The use case for this function is places where the shadowing state cannot be
4624 * modified due to branching and such. This will fail if the we don't have a
4625 * current shadow copy handy or if it's incompatible. The only code that will
4626 * be emitted here is value checking code in strict builds.
4627 *
4628 * The intended use can only be readonly!
4629 *
4630 * @returns The host register number, UINT8_MAX if not present.
4631 * @param pReNative The native recompile state.
4632 * @param poff Pointer to the instruction buffer offset.
4633 * Will be updated in strict builds if a register is
4634 * found.
4635 * @param enmGstReg The guest register that will is to be updated.
4636 * @note In strict builds, this may throw instruction buffer growth failures.
4637 * Non-strict builds will not throw anything.
4638 * @sa iemNativeRegAllocTmpForGuestReg
4639 */
4640DECL_HIDDEN_THROW(uint8_t)
4641iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4642{
4643 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4644#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4645 AssertMsg( pReNative->idxCurCall == 0
4646 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4647 || enmGstReg == kIemNativeGstReg_Pc,
4648 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4649#endif
4650
4651 /*
4652 * First check if the guest register value is already in a host register.
4653 */
4654 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4655 {
4656 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4657 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4658 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4659 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4660
4661 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4662 {
4663 /*
4664 * We only do readonly use here, so easy compared to the other
4665 * variant of this code.
4666 */
4667 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4668 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4669 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4670 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4671 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4672
4673#ifdef VBOX_STRICT
4674 /* Strict builds: Check that the value is correct. */
4675 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4676#else
4677 RT_NOREF(poff);
4678#endif
4679 return idxReg;
4680 }
4681 }
4682
4683 return UINT8_MAX;
4684}
4685
4686
4687/**
4688 * Allocates argument registers for a function call.
4689 *
4690 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4691 * need to check the return value.
4692 * @param pReNative The native recompile state.
4693 * @param off The current code buffer offset.
4694 * @param cArgs The number of arguments the function call takes.
4695 */
4696DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4697{
4698 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4700 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4701 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4702
4703 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4704 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4705 else if (cArgs == 0)
4706 return true;
4707
4708 /*
4709 * Do we get luck and all register are free and not shadowing anything?
4710 */
4711 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4712 for (uint32_t i = 0; i < cArgs; i++)
4713 {
4714 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4715 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4716 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4717 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4718 }
4719 /*
4720 * Okay, not lucky so we have to free up the registers.
4721 */
4722 else
4723 for (uint32_t i = 0; i < cArgs; i++)
4724 {
4725 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4726 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4727 {
4728 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4729 {
4730 case kIemNativeWhat_Var:
4731 {
4732 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4733 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4734 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4735 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4736 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4737#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4738 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4739#endif
4740
4741 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4742 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4743 else
4744 {
4745 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4746 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4747 }
4748 break;
4749 }
4750
4751 case kIemNativeWhat_Tmp:
4752 case kIemNativeWhat_Arg:
4753 case kIemNativeWhat_rc:
4754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4755 default:
4756 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4757 }
4758
4759 }
4760 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4761 {
4762 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4763 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4764 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4765 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4766 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4767 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4768 }
4769 else
4770 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4771 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4772 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4773 }
4774 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4775 return true;
4776}
4777
4778
4779DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4780
4781
4782#if 0
4783/**
4784 * Frees a register assignment of any type.
4785 *
4786 * @param pReNative The native recompile state.
4787 * @param idxHstReg The register to free.
4788 *
4789 * @note Does not update variables.
4790 */
4791DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4792{
4793 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4794 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4795 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4796 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4797 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4798 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4799 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4800 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4801 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4802 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4803 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4804 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4805 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4806 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4807
4808 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4809 /* no flushing, right:
4810 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4811 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4812 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4813 */
4814}
4815#endif
4816
4817
4818/**
4819 * Frees a temporary register.
4820 *
4821 * Any shadow copies of guest registers assigned to the host register will not
4822 * be flushed by this operation.
4823 */
4824DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4825{
4826 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4827 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4828 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4829 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4830 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4831}
4832
4833
4834/**
4835 * Frees a temporary immediate register.
4836 *
4837 * It is assumed that the call has not modified the register, so it still hold
4838 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4839 */
4840DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4841{
4842 iemNativeRegFreeTmp(pReNative, idxHstReg);
4843}
4844
4845
4846/**
4847 * Frees a register assigned to a variable.
4848 *
4849 * The register will be disassociated from the variable.
4850 */
4851DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4852{
4853 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4854 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4855 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4856 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4857 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4859 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4860#endif
4861
4862 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4863 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4864 if (!fFlushShadows)
4865 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4866 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4867 else
4868 {
4869 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4870 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4871 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4872 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4873 uint64_t fGstRegShadows = fGstRegShadowsOld;
4874 while (fGstRegShadows)
4875 {
4876 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4877 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4878
4879 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4880 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4881 }
4882 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4883 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4884 }
4885}
4886
4887
4888#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4889# ifdef LOG_ENABLED
4890/** Host CPU SIMD register names. */
4891DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4892{
4893# ifdef RT_ARCH_AMD64
4894 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4895# elif RT_ARCH_ARM64
4896 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4897 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4898# else
4899# error "port me"
4900# endif
4901};
4902# endif
4903
4904
4905/**
4906 * Frees a SIMD register assigned to a variable.
4907 *
4908 * The register will be disassociated from the variable.
4909 */
4910DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4911{
4912 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4913 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4914 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4915 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4916 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4917 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4918
4919 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4920 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4921 if (!fFlushShadows)
4922 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4923 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4924 else
4925 {
4926 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4927 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4928 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4929 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4930 uint64_t fGstRegShadows = fGstRegShadowsOld;
4931 while (fGstRegShadows)
4932 {
4933 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4934 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4935
4936 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4937 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4938 }
4939 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4940 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4941 }
4942}
4943#endif
4944
4945
4946/**
4947 * Called right before emitting a call instruction to move anything important
4948 * out of call-volatile registers, free and flush the call-volatile registers,
4949 * optionally freeing argument variables.
4950 *
4951 * @returns New code buffer offset, UINT32_MAX on failure.
4952 * @param pReNative The native recompile state.
4953 * @param off The code buffer offset.
4954 * @param cArgs The number of arguments the function call takes.
4955 * It is presumed that the host register part of these have
4956 * been allocated as such already and won't need moving,
4957 * just freeing.
4958 * @param fKeepVars Mask of variables that should keep their register
4959 * assignments. Caller must take care to handle these.
4960 */
4961DECL_HIDDEN_THROW(uint32_t)
4962iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4963{
4964 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4965
4966 /* fKeepVars will reduce this mask. */
4967 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4968
4969 /*
4970 * Move anything important out of volatile registers.
4971 */
4972 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4973 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4974 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4975#ifdef IEMNATIVE_REG_FIXED_TMP0
4976 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4977#endif
4978#ifdef IEMNATIVE_REG_FIXED_TMP1
4979 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4980#endif
4981#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4982 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4983#endif
4984 & ~g_afIemNativeCallRegs[cArgs];
4985
4986 fRegsToMove &= pReNative->Core.bmHstRegs;
4987 if (!fRegsToMove)
4988 { /* likely */ }
4989 else
4990 {
4991 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4992 while (fRegsToMove != 0)
4993 {
4994 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4995 fRegsToMove &= ~RT_BIT_32(idxReg);
4996
4997 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4998 {
4999 case kIemNativeWhat_Var:
5000 {
5001 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
5002 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5003 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5004 Assert(pVar->idxReg == idxReg);
5005 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5006 {
5007 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
5008 idxVar, pVar->enmKind, pVar->idxReg));
5009 if (pVar->enmKind != kIemNativeVarKind_Stack)
5010 pVar->idxReg = UINT8_MAX;
5011 else
5012 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5013 }
5014 else
5015 fRegsToFree &= ~RT_BIT_32(idxReg);
5016 continue;
5017 }
5018
5019 case kIemNativeWhat_Arg:
5020 AssertMsgFailed(("What?!?: %u\n", idxReg));
5021 continue;
5022
5023 case kIemNativeWhat_rc:
5024 case kIemNativeWhat_Tmp:
5025 AssertMsgFailed(("Missing free: %u\n", idxReg));
5026 continue;
5027
5028 case kIemNativeWhat_FixedTmp:
5029 case kIemNativeWhat_pVCpuFixed:
5030 case kIemNativeWhat_pCtxFixed:
5031 case kIemNativeWhat_PcShadow:
5032 case kIemNativeWhat_FixedReserved:
5033 case kIemNativeWhat_Invalid:
5034 case kIemNativeWhat_End:
5035 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5036 }
5037 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5038 }
5039 }
5040
5041 /*
5042 * Do the actual freeing.
5043 */
5044 if (pReNative->Core.bmHstRegs & fRegsToFree)
5045 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5046 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5047 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5048
5049 /* If there are guest register shadows in any call-volatile register, we
5050 have to clear the corrsponding guest register masks for each register. */
5051 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5052 if (fHstRegsWithGstShadow)
5053 {
5054 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5055 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5056 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5057 do
5058 {
5059 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5060 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5061
5062 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5063 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5064 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5065 } while (fHstRegsWithGstShadow != 0);
5066 }
5067
5068 return off;
5069}
5070
5071
5072/**
5073 * Flushes a set of guest register shadow copies.
5074 *
5075 * This is usually done after calling a threaded function or a C-implementation
5076 * of an instruction.
5077 *
5078 * @param pReNative The native recompile state.
5079 * @param fGstRegs Set of guest registers to flush.
5080 */
5081DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5082{
5083 /*
5084 * Reduce the mask by what's currently shadowed
5085 */
5086 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5087 fGstRegs &= bmGstRegShadowsOld;
5088 if (fGstRegs)
5089 {
5090 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5091 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5092 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5093 if (bmGstRegShadowsNew)
5094 {
5095 /*
5096 * Partial.
5097 */
5098 do
5099 {
5100 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5101 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5102 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5103 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5104 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5105
5106 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5107 fGstRegs &= ~fInThisHstReg;
5108 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5109 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5110 if (!fGstRegShadowsNew)
5111 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5112 } while (fGstRegs != 0);
5113 }
5114 else
5115 {
5116 /*
5117 * Clear all.
5118 */
5119 do
5120 {
5121 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5122 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5123 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5124 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5125 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5126
5127 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5128 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5129 } while (fGstRegs != 0);
5130 pReNative->Core.bmHstRegsWithGstShadow = 0;
5131 }
5132 }
5133}
5134
5135
5136/**
5137 * Flushes guest register shadow copies held by a set of host registers.
5138 *
5139 * This is used with the TLB lookup code for ensuring that we don't carry on
5140 * with any guest shadows in volatile registers, as these will get corrupted by
5141 * a TLB miss.
5142 *
5143 * @param pReNative The native recompile state.
5144 * @param fHstRegs Set of host registers to flush guest shadows for.
5145 */
5146DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5147{
5148 /*
5149 * Reduce the mask by what's currently shadowed.
5150 */
5151 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5152 fHstRegs &= bmHstRegsWithGstShadowOld;
5153 if (fHstRegs)
5154 {
5155 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5156 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5157 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5158 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5159 if (bmHstRegsWithGstShadowNew)
5160 {
5161 /*
5162 * Partial (likely).
5163 */
5164 uint64_t fGstShadows = 0;
5165 do
5166 {
5167 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5168 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5169 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5170 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5171
5172 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5173 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5174 fHstRegs &= ~RT_BIT_32(idxHstReg);
5175 } while (fHstRegs != 0);
5176 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5177 }
5178 else
5179 {
5180 /*
5181 * Clear all.
5182 */
5183 do
5184 {
5185 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5186 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5187 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5188 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5189
5190 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5191 fHstRegs &= ~RT_BIT_32(idxHstReg);
5192 } while (fHstRegs != 0);
5193 pReNative->Core.bmGstRegShadows = 0;
5194 }
5195 }
5196}
5197
5198
5199/**
5200 * Restores guest shadow copies in volatile registers.
5201 *
5202 * This is used after calling a helper function (think TLB miss) to restore the
5203 * register state of volatile registers.
5204 *
5205 * @param pReNative The native recompile state.
5206 * @param off The code buffer offset.
5207 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5208 * be active (allocated) w/o asserting. Hack.
5209 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5210 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5211 */
5212DECL_HIDDEN_THROW(uint32_t)
5213iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5214{
5215 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5216 if (fHstRegs)
5217 {
5218 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5219 do
5220 {
5221 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5222
5223 /* It's not fatal if a register is active holding a variable that
5224 shadowing a guest register, ASSUMING all pending guest register
5225 writes were flushed prior to the helper call. However, we'll be
5226 emitting duplicate restores, so it wasts code space. */
5227 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5228 RT_NOREF(fHstRegsActiveShadows);
5229
5230 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5231 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5232 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5233 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5234
5235 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5236 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5237
5238 fHstRegs &= ~RT_BIT_32(idxHstReg);
5239 } while (fHstRegs != 0);
5240 }
5241 return off;
5242}
5243
5244
5245
5246
5247/*********************************************************************************************************************************
5248* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5249*********************************************************************************************************************************/
5250#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5251
5252/**
5253 * Info about shadowed guest SIMD register values.
5254 * @see IEMNATIVEGSTSIMDREG
5255 */
5256static struct
5257{
5258 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5259 uint32_t offXmm;
5260 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5261 uint32_t offYmm;
5262 /** Name (for logging). */
5263 const char *pszName;
5264} const g_aGstSimdShadowInfo[] =
5265{
5266#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5267 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5268 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5269 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5270 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5271 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5272 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5273 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5274 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5275 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5276 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5277 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5278 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5279 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5280 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5281 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5282 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5283 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5284#undef CPUMCTX_OFF_AND_SIZE
5285};
5286AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5287
5288
5289/**
5290 * Frees a temporary SIMD register.
5291 *
5292 * Any shadow copies of guest registers assigned to the host register will not
5293 * be flushed by this operation.
5294 */
5295DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5296{
5297 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5298 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5299 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5300 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5301 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5302}
5303
5304
5305/**
5306 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5307 *
5308 * @returns New code bufferoffset.
5309 * @param pReNative The native recompile state.
5310 * @param off Current code buffer position.
5311 * @param enmGstSimdReg The guest SIMD register to flush.
5312 */
5313DECL_HIDDEN_THROW(uint32_t)
5314iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5315{
5316 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5317
5318 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5319 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5320 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5321 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5322
5323 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5324 {
5325 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5326 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5327 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5328 }
5329
5330 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5331 {
5332 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5333 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5334 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5335 }
5336
5337 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5338 return off;
5339}
5340
5341
5342/**
5343 * Locate a register, possibly freeing one up.
5344 *
5345 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5346 * failed.
5347 *
5348 * @returns Host register number on success. Returns UINT8_MAX if no registers
5349 * found, the caller is supposed to deal with this and raise a
5350 * allocation type specific status code (if desired).
5351 *
5352 * @throws VBox status code if we're run into trouble spilling a variable of
5353 * recording debug info. Does NOT throw anything if we're out of
5354 * registers, though.
5355 */
5356static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5357 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5358{
5359 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5360 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5361 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5362
5363 /*
5364 * Try a freed register that's shadowing a guest register.
5365 */
5366 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5367 if (fRegs)
5368 {
5369 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5370
5371#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5372 /*
5373 * When we have livness information, we use it to kick out all shadowed
5374 * guest register that will not be needed any more in this TB. If we're
5375 * lucky, this may prevent us from ending up here again.
5376 *
5377 * Note! We must consider the previous entry here so we don't free
5378 * anything that the current threaded function requires (current
5379 * entry is produced by the next threaded function).
5380 */
5381 uint32_t const idxCurCall = pReNative->idxCurCall;
5382 if (idxCurCall > 0)
5383 {
5384 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5385
5386# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5387 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5388 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5389 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5390#else
5391 /* Construct a mask of the registers not in the read or write state.
5392 Note! We could skips writes, if they aren't from us, as this is just
5393 a hack to prevent trashing registers that have just been written
5394 or will be written when we retire the current instruction. */
5395 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5396 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5397 & IEMLIVENESSBIT_MASK;
5398#endif
5399 /* If it matches any shadowed registers. */
5400 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5401 {
5402 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5403 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5404 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5405
5406 /* See if we've got any unshadowed registers we can return now. */
5407 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5408 if (fUnshadowedRegs)
5409 {
5410 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5411 return (fPreferVolatile
5412 ? ASMBitFirstSetU32(fUnshadowedRegs)
5413 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5414 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5415 - 1;
5416 }
5417 }
5418 }
5419#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5420
5421 unsigned const idxReg = (fPreferVolatile
5422 ? ASMBitFirstSetU32(fRegs)
5423 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5424 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5425 - 1;
5426
5427 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5428 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5429 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5430 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5431
5432 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5433 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5434 uint32_t idxGstSimdReg = 0;
5435 do
5436 {
5437 if (fGstRegShadows & 0x1)
5438 {
5439 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5440 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5441 }
5442 idxGstSimdReg++;
5443 fGstRegShadows >>= 1;
5444 } while (fGstRegShadows);
5445
5446 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5447 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5448 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5449 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5450 return idxReg;
5451 }
5452
5453 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5454
5455 /*
5456 * Try free up a variable that's in a register.
5457 *
5458 * We do two rounds here, first evacuating variables we don't need to be
5459 * saved on the stack, then in the second round move things to the stack.
5460 */
5461 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5462 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5463 {
5464 uint32_t fVars = pReNative->Core.bmVars;
5465 while (fVars)
5466 {
5467 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5468 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5469 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5470 continue;
5471
5472 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5473 && (RT_BIT_32(idxReg) & fRegMask)
5474 && ( iLoop == 0
5475 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5476 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5477 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5478 {
5479 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5480 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5481 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5482 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5483 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5484 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5485
5486 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5487 {
5488 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5489 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5490 }
5491
5492 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5493 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5494
5495 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5496 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5497 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5498 return idxReg;
5499 }
5500 fVars &= ~RT_BIT_32(idxVar);
5501 }
5502 }
5503
5504 AssertFailed();
5505 return UINT8_MAX;
5506}
5507
5508
5509/**
5510 * Flushes a set of guest register shadow copies.
5511 *
5512 * This is usually done after calling a threaded function or a C-implementation
5513 * of an instruction.
5514 *
5515 * @param pReNative The native recompile state.
5516 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5517 */
5518DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5519{
5520 /*
5521 * Reduce the mask by what's currently shadowed
5522 */
5523 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5524 fGstSimdRegs &= bmGstSimdRegShadows;
5525 if (fGstSimdRegs)
5526 {
5527 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5528 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5529 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5530 if (bmGstSimdRegShadowsNew)
5531 {
5532 /*
5533 * Partial.
5534 */
5535 do
5536 {
5537 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5538 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5539 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5540 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5541 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5542 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5543
5544 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5545 fGstSimdRegs &= ~fInThisHstReg;
5546 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5547 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5548 if (!fGstRegShadowsNew)
5549 {
5550 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5551 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5552 }
5553 } while (fGstSimdRegs != 0);
5554 }
5555 else
5556 {
5557 /*
5558 * Clear all.
5559 */
5560 do
5561 {
5562 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5563 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5564 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5565 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5566 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5567 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5568
5569 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5570 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5571 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5572 } while (fGstSimdRegs != 0);
5573 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5574 }
5575 }
5576}
5577
5578
5579/**
5580 * Allocates a temporary host SIMD register.
5581 *
5582 * This may emit code to save register content onto the stack in order to free
5583 * up a register.
5584 *
5585 * @returns The host register number; throws VBox status code on failure,
5586 * so no need to check the return value.
5587 * @param pReNative The native recompile state.
5588 * @param poff Pointer to the variable with the code buffer position.
5589 * This will be update if we need to move a variable from
5590 * register to stack in order to satisfy the request.
5591 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5592 * registers (@c true, default) or the other way around
5593 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5594 */
5595DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5596{
5597 /*
5598 * Try find a completely unused register, preferably a call-volatile one.
5599 */
5600 uint8_t idxSimdReg;
5601 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5602 & ~pReNative->Core.bmHstRegsWithGstShadow
5603 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5604 if (fRegs)
5605 {
5606 if (fPreferVolatile)
5607 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5608 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5609 else
5610 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5611 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5612 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5613 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5614
5615 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5616 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5617 }
5618 else
5619 {
5620 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5621 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5622 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5623 }
5624
5625 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5626 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5627}
5628
5629
5630/**
5631 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5632 * registers.
5633 *
5634 * @returns The host register number; throws VBox status code on failure,
5635 * so no need to check the return value.
5636 * @param pReNative The native recompile state.
5637 * @param poff Pointer to the variable with the code buffer position.
5638 * This will be update if we need to move a variable from
5639 * register to stack in order to satisfy the request.
5640 * @param fRegMask Mask of acceptable registers.
5641 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5642 * registers (@c true, default) or the other way around
5643 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5644 */
5645DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5646 bool fPreferVolatile /*= true*/)
5647{
5648 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5649 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5650
5651 /*
5652 * Try find a completely unused register, preferably a call-volatile one.
5653 */
5654 uint8_t idxSimdReg;
5655 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5656 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5657 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5658 & fRegMask;
5659 if (fRegs)
5660 {
5661 if (fPreferVolatile)
5662 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5663 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5664 else
5665 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5666 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5667 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5668 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5669
5670 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5671 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5672 }
5673 else
5674 {
5675 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5676 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5677 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5678 }
5679
5680 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5681 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5682}
5683
5684
5685/**
5686 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5687 *
5688 * @param pReNative The native recompile state.
5689 * @param idxHstSimdReg The host SIMD register to update the state for.
5690 * @param enmLoadSz The load size to set.
5691 */
5692DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5693 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5694{
5695 /* Everything valid already? -> nothing to do. */
5696 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5697 return;
5698
5699 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5700 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5701 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5702 {
5703 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5704 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5705 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5706 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5707 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5708 }
5709}
5710
5711
5712static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5713 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5714{
5715 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5716 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5717 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5718 {
5719# ifdef RT_ARCH_ARM64
5720 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5721 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5722# endif
5723
5724 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5725 {
5726 switch (enmLoadSzDst)
5727 {
5728 case kIemNativeGstSimdRegLdStSz_256:
5729 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5730 break;
5731 case kIemNativeGstSimdRegLdStSz_Low128:
5732 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5733 break;
5734 case kIemNativeGstSimdRegLdStSz_High128:
5735 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5736 break;
5737 default:
5738 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5739 }
5740
5741 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5742 }
5743 }
5744 else
5745 {
5746 /* Complicated stuff where the source is currently missing something, later. */
5747 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5748 }
5749
5750 return off;
5751}
5752
5753
5754/**
5755 * Allocates a temporary host SIMD register for keeping a guest
5756 * SIMD register value.
5757 *
5758 * Since we may already have a register holding the guest register value,
5759 * code will be emitted to do the loading if that's not the case. Code may also
5760 * be emitted if we have to free up a register to satify the request.
5761 *
5762 * @returns The host register number; throws VBox status code on failure, so no
5763 * need to check the return value.
5764 * @param pReNative The native recompile state.
5765 * @param poff Pointer to the variable with the code buffer
5766 * position. This will be update if we need to move a
5767 * variable from register to stack in order to satisfy
5768 * the request.
5769 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5770 * @param enmIntendedUse How the caller will be using the host register.
5771 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5772 * register is okay (default). The ASSUMPTION here is
5773 * that the caller has already flushed all volatile
5774 * registers, so this is only applied if we allocate a
5775 * new register.
5776 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5777 */
5778DECL_HIDDEN_THROW(uint8_t)
5779iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5780 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5781 bool fNoVolatileRegs /*= false*/)
5782{
5783 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5784#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5785 AssertMsg( pReNative->idxCurCall == 0
5786 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5787 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5788 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5789 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5790 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5791 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5792#endif
5793#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5794 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5795#endif
5796 uint32_t const fRegMask = !fNoVolatileRegs
5797 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5798 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5799
5800 /*
5801 * First check if the guest register value is already in a host register.
5802 */
5803 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5804 {
5805 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5806 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5807 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5808 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5809
5810 /* It's not supposed to be allocated... */
5811 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5812 {
5813 /*
5814 * If the register will trash the guest shadow copy, try find a
5815 * completely unused register we can use instead. If that fails,
5816 * we need to disassociate the host reg from the guest reg.
5817 */
5818 /** @todo would be nice to know if preserving the register is in any way helpful. */
5819 /* If the purpose is calculations, try duplicate the register value as
5820 we'll be clobbering the shadow. */
5821 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5822 && ( ~pReNative->Core.bmHstSimdRegs
5823 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5824 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5825 {
5826 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5827
5828 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5829
5830 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5831 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5832 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5833 idxSimdReg = idxRegNew;
5834 }
5835 /* If the current register matches the restrictions, go ahead and allocate
5836 it for the caller. */
5837 else if (fRegMask & RT_BIT_32(idxSimdReg))
5838 {
5839 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5840 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5841 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5842 {
5843 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5844 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5845 else
5846 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5847 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5848 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5849 }
5850 else
5851 {
5852 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5853 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5854 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5855 }
5856 }
5857 /* Otherwise, allocate a register that satisfies the caller and transfer
5858 the shadowing if compatible with the intended use. (This basically
5859 means the call wants a non-volatile register (RSP push/pop scenario).) */
5860 else
5861 {
5862 Assert(fNoVolatileRegs);
5863 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5864 !fNoVolatileRegs
5865 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5866 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5867 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5868 {
5869 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5870 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5871 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5872 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5873 }
5874 else
5875 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5876 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5877 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5878 idxSimdReg = idxRegNew;
5879 }
5880 }
5881 else
5882 {
5883 /*
5884 * Oops. Shadowed guest register already allocated!
5885 *
5886 * Allocate a new register, copy the value and, if updating, the
5887 * guest shadow copy assignment to the new register.
5888 */
5889 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5890 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5891 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5892 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5893
5894 /** @todo share register for readonly access. */
5895 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5896 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5897
5898 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5899 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5900 else
5901 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5902
5903 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5904 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5905 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5906 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5907 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5908 else
5909 {
5910 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5911 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5912 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5913 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5914 }
5915 idxSimdReg = idxRegNew;
5916 }
5917 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5918
5919#ifdef VBOX_STRICT
5920 /* Strict builds: Check that the value is correct. */
5921 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5922 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5923#endif
5924
5925 return idxSimdReg;
5926 }
5927
5928 /*
5929 * Allocate a new register, load it with the guest value and designate it as a copy of the
5930 */
5931 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5932
5933 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5934 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5935 else
5936 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5937
5938 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5939 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5940
5941 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5942 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5943
5944 return idxRegNew;
5945}
5946
5947#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5948
5949
5950
5951/*********************************************************************************************************************************
5952* Code emitters for flushing pending guest register writes and sanity checks *
5953*********************************************************************************************************************************/
5954
5955#ifdef VBOX_STRICT
5956/**
5957 * Does internal register allocator sanity checks.
5958 */
5959DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5960{
5961 /*
5962 * Iterate host registers building a guest shadowing set.
5963 */
5964 uint64_t bmGstRegShadows = 0;
5965 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5966 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5967 while (bmHstRegsWithGstShadow)
5968 {
5969 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5970 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5971 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5972
5973 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5974 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5975 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5976 bmGstRegShadows |= fThisGstRegShadows;
5977 while (fThisGstRegShadows)
5978 {
5979 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5980 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5981 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5982 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5983 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5984 }
5985 }
5986 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5987 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5988 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5989
5990 /*
5991 * Now the other way around, checking the guest to host index array.
5992 */
5993 bmHstRegsWithGstShadow = 0;
5994 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5995 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5996 while (bmGstRegShadows)
5997 {
5998 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5999 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6000 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6001
6002 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6003 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6004 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6005 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6006 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6007 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6008 }
6009 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6010 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6011 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6012}
6013#endif /* VBOX_STRICT */
6014
6015
6016/**
6017 * Flushes any delayed guest register writes.
6018 *
6019 * This must be called prior to calling CImpl functions and any helpers that use
6020 * the guest state (like raising exceptions) and such.
6021 *
6022 * This optimization has not yet been implemented. The first target would be
6023 * RIP updates, since these are the most common ones.
6024 */
6025DECL_HIDDEN_THROW(uint32_t)
6026iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
6027{
6028#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6029 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6030 off = iemNativeEmitPcWriteback(pReNative, off);
6031#else
6032 RT_NOREF(pReNative, fGstShwExcept);
6033#endif
6034
6035#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6036 /** @todo r=bird: There must be a quicker way to check if anything needs
6037 * doing and then call simd function to do the flushing */
6038 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6039 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6040 {
6041 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6042 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6043
6044 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6045 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6046
6047 if ( fFlushShadows
6048 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6049 {
6050 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6051
6052 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6053 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6054 }
6055 }
6056#else
6057 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6058#endif
6059
6060 return off;
6061}
6062
6063
6064#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6065/**
6066 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6067 */
6068DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6069{
6070 Assert(pReNative->Core.offPc);
6071# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6072 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6073 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6074# endif
6075
6076# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6077 /* Allocate a temporary PC register. */
6078 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6079
6080 /* Perform the addition and store the result. */
6081 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6083
6084 /* Free but don't flush the PC register. */
6085 iemNativeRegFreeTmp(pReNative, idxPcReg);
6086# else
6087 /* Compare the shadow with the context value, they should match. */
6088 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6089 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6090# endif
6091
6092 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6093 pReNative->Core.offPc = 0;
6094 pReNative->Core.cInstrPcUpdateSkipped = 0;
6095
6096 return off;
6097}
6098#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6099
6100
6101/*********************************************************************************************************************************
6102* Code Emitters (larger snippets) *
6103*********************************************************************************************************************************/
6104
6105/**
6106 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6107 * extending to 64-bit width.
6108 *
6109 * @returns New code buffer offset on success, UINT32_MAX on failure.
6110 * @param pReNative .
6111 * @param off The current code buffer position.
6112 * @param idxHstReg The host register to load the guest register value into.
6113 * @param enmGstReg The guest register to load.
6114 *
6115 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6116 * that is something the caller needs to do if applicable.
6117 */
6118DECL_HIDDEN_THROW(uint32_t)
6119iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6120{
6121 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6122 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6123
6124 switch (g_aGstShadowInfo[enmGstReg].cb)
6125 {
6126 case sizeof(uint64_t):
6127 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6128 case sizeof(uint32_t):
6129 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6130 case sizeof(uint16_t):
6131 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6132#if 0 /* not present in the table. */
6133 case sizeof(uint8_t):
6134 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6135#endif
6136 default:
6137 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6138 }
6139}
6140
6141
6142#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6143/**
6144 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6145 *
6146 * @returns New code buffer offset on success, UINT32_MAX on failure.
6147 * @param pReNative The recompiler state.
6148 * @param off The current code buffer position.
6149 * @param idxHstSimdReg The host register to load the guest register value into.
6150 * @param enmGstSimdReg The guest register to load.
6151 * @param enmLoadSz The load size of the register.
6152 *
6153 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6154 * that is something the caller needs to do if applicable.
6155 */
6156DECL_HIDDEN_THROW(uint32_t)
6157iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6158 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6159{
6160 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6161
6162 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6163 switch (enmLoadSz)
6164 {
6165 case kIemNativeGstSimdRegLdStSz_256:
6166 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6167 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6168 case kIemNativeGstSimdRegLdStSz_Low128:
6169 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6170 case kIemNativeGstSimdRegLdStSz_High128:
6171 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6172 default:
6173 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6174 }
6175}
6176#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6177
6178#ifdef VBOX_STRICT
6179
6180/**
6181 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6182 *
6183 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6184 * Trashes EFLAGS on AMD64.
6185 */
6186DECL_HIDDEN_THROW(uint32_t)
6187iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6188{
6189# ifdef RT_ARCH_AMD64
6190 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6191
6192 /* rol reg64, 32 */
6193 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6194 pbCodeBuf[off++] = 0xc1;
6195 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6196 pbCodeBuf[off++] = 32;
6197
6198 /* test reg32, ffffffffh */
6199 if (idxReg >= 8)
6200 pbCodeBuf[off++] = X86_OP_REX_B;
6201 pbCodeBuf[off++] = 0xf7;
6202 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6203 pbCodeBuf[off++] = 0xff;
6204 pbCodeBuf[off++] = 0xff;
6205 pbCodeBuf[off++] = 0xff;
6206 pbCodeBuf[off++] = 0xff;
6207
6208 /* je/jz +1 */
6209 pbCodeBuf[off++] = 0x74;
6210 pbCodeBuf[off++] = 0x01;
6211
6212 /* int3 */
6213 pbCodeBuf[off++] = 0xcc;
6214
6215 /* rol reg64, 32 */
6216 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6217 pbCodeBuf[off++] = 0xc1;
6218 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6219 pbCodeBuf[off++] = 32;
6220
6221# elif defined(RT_ARCH_ARM64)
6222 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6223 /* lsr tmp0, reg64, #32 */
6224 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6225 /* cbz tmp0, +1 */
6226 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6227 /* brk #0x1100 */
6228 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6229
6230# else
6231# error "Port me!"
6232# endif
6233 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6234 return off;
6235}
6236
6237
6238/**
6239 * Emitting code that checks that the content of register @a idxReg is the same
6240 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6241 * instruction if that's not the case.
6242 *
6243 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6244 * Trashes EFLAGS on AMD64.
6245 */
6246DECL_HIDDEN_THROW(uint32_t)
6247iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6248{
6249# ifdef RT_ARCH_AMD64
6250 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6251
6252 /* cmp reg, [mem] */
6253 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6254 {
6255 if (idxReg >= 8)
6256 pbCodeBuf[off++] = X86_OP_REX_R;
6257 pbCodeBuf[off++] = 0x38;
6258 }
6259 else
6260 {
6261 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6262 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6263 else
6264 {
6265 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6266 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6267 else
6268 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6269 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6270 if (idxReg >= 8)
6271 pbCodeBuf[off++] = X86_OP_REX_R;
6272 }
6273 pbCodeBuf[off++] = 0x39;
6274 }
6275 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6276
6277 /* je/jz +1 */
6278 pbCodeBuf[off++] = 0x74;
6279 pbCodeBuf[off++] = 0x01;
6280
6281 /* int3 */
6282 pbCodeBuf[off++] = 0xcc;
6283
6284 /* For values smaller than the register size, we must check that the rest
6285 of the register is all zeros. */
6286 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6287 {
6288 /* test reg64, imm32 */
6289 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6290 pbCodeBuf[off++] = 0xf7;
6291 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6292 pbCodeBuf[off++] = 0;
6293 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6294 pbCodeBuf[off++] = 0xff;
6295 pbCodeBuf[off++] = 0xff;
6296
6297 /* je/jz +1 */
6298 pbCodeBuf[off++] = 0x74;
6299 pbCodeBuf[off++] = 0x01;
6300
6301 /* int3 */
6302 pbCodeBuf[off++] = 0xcc;
6303 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6304 }
6305 else
6306 {
6307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6308 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6309 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6310 }
6311
6312# elif defined(RT_ARCH_ARM64)
6313 /* mov TMP0, [gstreg] */
6314 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6315
6316 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6317 /* sub tmp0, tmp0, idxReg */
6318 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6319 /* cbz tmp0, +1 */
6320 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6321 /* brk #0x1000+enmGstReg */
6322 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6323 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6324
6325# else
6326# error "Port me!"
6327# endif
6328 return off;
6329}
6330
6331
6332# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6333# ifdef RT_ARCH_AMD64
6334/**
6335 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6336 */
6337DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6338{
6339 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6340 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6341 if (idxSimdReg >= 8)
6342 pbCodeBuf[off++] = X86_OP_REX_R;
6343 pbCodeBuf[off++] = 0x0f;
6344 pbCodeBuf[off++] = 0x38;
6345 pbCodeBuf[off++] = 0x29;
6346 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6347
6348 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6349 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6350 pbCodeBuf[off++] = X86_OP_REX_W
6351 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6352 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6353 pbCodeBuf[off++] = 0x0f;
6354 pbCodeBuf[off++] = 0x3a;
6355 pbCodeBuf[off++] = 0x16;
6356 pbCodeBuf[off++] = 0xeb;
6357 pbCodeBuf[off++] = 0x00;
6358
6359 /* cmp tmp0, 0xffffffffffffffff. */
6360 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6361 pbCodeBuf[off++] = 0x83;
6362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6363 pbCodeBuf[off++] = 0xff;
6364
6365 /* je/jz +1 */
6366 pbCodeBuf[off++] = 0x74;
6367 pbCodeBuf[off++] = 0x01;
6368
6369 /* int3 */
6370 pbCodeBuf[off++] = 0xcc;
6371
6372 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6373 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6374 pbCodeBuf[off++] = X86_OP_REX_W
6375 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6376 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6377 pbCodeBuf[off++] = 0x0f;
6378 pbCodeBuf[off++] = 0x3a;
6379 pbCodeBuf[off++] = 0x16;
6380 pbCodeBuf[off++] = 0xeb;
6381 pbCodeBuf[off++] = 0x01;
6382
6383 /* cmp tmp0, 0xffffffffffffffff. */
6384 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6385 pbCodeBuf[off++] = 0x83;
6386 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6387 pbCodeBuf[off++] = 0xff;
6388
6389 /* je/jz +1 */
6390 pbCodeBuf[off++] = 0x74;
6391 pbCodeBuf[off++] = 0x01;
6392
6393 /* int3 */
6394 pbCodeBuf[off++] = 0xcc;
6395
6396 return off;
6397}
6398# endif
6399
6400
6401/**
6402 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6403 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6404 * instruction if that's not the case.
6405 *
6406 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6407 * Trashes EFLAGS on AMD64.
6408 */
6409DECL_HIDDEN_THROW(uint32_t)
6410iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6411 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6412{
6413 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6414 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6415 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6416 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6417 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6418 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6419 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6420 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6421 return off;
6422
6423# ifdef RT_ARCH_AMD64
6424 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6425 {
6426 /* movdqa vectmp0, idxSimdReg */
6427 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6428
6429 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6430
6431 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6432 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6433 }
6434
6435 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6436 {
6437 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6438 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6439
6440 /* vextracti128 vectmp0, idxSimdReg, 1 */
6441 pbCodeBuf[off++] = X86_OP_VEX3;
6442 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6443 | X86_OP_VEX3_BYTE1_X
6444 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6445 | 0x03; /* Opcode map */
6446 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6447 pbCodeBuf[off++] = 0x39;
6448 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6449 pbCodeBuf[off++] = 0x01;
6450
6451 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6452 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6453 }
6454# elif defined(RT_ARCH_ARM64)
6455 /* mov vectmp0, [gstreg] */
6456 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6457
6458 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6459 {
6460 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6461 /* eor vectmp0, vectmp0, idxSimdReg */
6462 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6463 /* uaddlv vectmp0, vectmp0.16B */
6464 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6465 /* umov tmp0, vectmp0.H[0] */
6466 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6467 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6468 /* cbz tmp0, +1 */
6469 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6470 /* brk #0x1000+enmGstReg */
6471 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6472 }
6473
6474 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6475 {
6476 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6477 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6478 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6479 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6480 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6481 /* umov tmp0, (vectmp0 + 1).H[0] */
6482 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6483 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6484 /* cbz tmp0, +1 */
6485 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6486 /* brk #0x1000+enmGstReg */
6487 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6488 }
6489
6490# else
6491# error "Port me!"
6492# endif
6493
6494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6495 return off;
6496}
6497# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6498
6499
6500/**
6501 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6502 * important bits.
6503 *
6504 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6505 * Trashes EFLAGS on AMD64.
6506 */
6507DECL_HIDDEN_THROW(uint32_t)
6508iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6509{
6510 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6511 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6512 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6513 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6514
6515#ifdef RT_ARCH_AMD64
6516 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6517
6518 /* je/jz +1 */
6519 pbCodeBuf[off++] = 0x74;
6520 pbCodeBuf[off++] = 0x01;
6521
6522 /* int3 */
6523 pbCodeBuf[off++] = 0xcc;
6524
6525# elif defined(RT_ARCH_ARM64)
6526 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6527
6528 /* b.eq +1 */
6529 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6530 /* brk #0x2000 */
6531 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6532
6533# else
6534# error "Port me!"
6535# endif
6536 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6537
6538 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6539 return off;
6540}
6541
6542#endif /* VBOX_STRICT */
6543
6544
6545#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6546/**
6547 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6548 */
6549DECL_HIDDEN_THROW(uint32_t)
6550iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6551{
6552 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6553
6554 fEflNeeded &= X86_EFL_STATUS_BITS;
6555 if (fEflNeeded)
6556 {
6557# ifdef RT_ARCH_AMD64
6558 /* test dword [pVCpu + offVCpu], imm32 */
6559 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6560 if (fEflNeeded <= 0xff)
6561 {
6562 pCodeBuf[off++] = 0xf6;
6563 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6564 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6565 }
6566 else
6567 {
6568 pCodeBuf[off++] = 0xf7;
6569 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6570 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6571 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6572 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6573 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6574 }
6575 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6576
6577# else
6578 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6579 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6580 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6581# ifdef RT_ARCH_ARM64
6582 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6583 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6584# else
6585# error "Port me!"
6586# endif
6587 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6588# endif
6589 }
6590 return off;
6591}
6592#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6593
6594
6595/**
6596 * Emits a code for checking the return code of a call and rcPassUp, returning
6597 * from the code if either are non-zero.
6598 */
6599DECL_HIDDEN_THROW(uint32_t)
6600iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6601{
6602#ifdef RT_ARCH_AMD64
6603 /*
6604 * AMD64: eax = call status code.
6605 */
6606
6607 /* edx = rcPassUp */
6608 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6609# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6610 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6611# endif
6612
6613 /* edx = eax | rcPassUp */
6614 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6615 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6616 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6618
6619 /* Jump to non-zero status return path. */
6620 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6621
6622 /* done. */
6623
6624#elif RT_ARCH_ARM64
6625 /*
6626 * ARM64: w0 = call status code.
6627 */
6628# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6629 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6630# endif
6631 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6632
6633 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6634
6635 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6636
6637 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6638 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6639 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6640
6641#else
6642# error "port me"
6643#endif
6644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6645 RT_NOREF_PV(idxInstr);
6646 return off;
6647}
6648
6649
6650/**
6651 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6652 * raising a \#GP(0) if it isn't.
6653 *
6654 * @returns New code buffer offset, UINT32_MAX on failure.
6655 * @param pReNative The native recompile state.
6656 * @param off The code buffer offset.
6657 * @param idxAddrReg The host register with the address to check.
6658 * @param idxInstr The current instruction.
6659 */
6660DECL_HIDDEN_THROW(uint32_t)
6661iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6662{
6663 /*
6664 * Make sure we don't have any outstanding guest register writes as we may
6665 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6666 */
6667 off = iemNativeRegFlushPendingWrites(pReNative, off);
6668
6669#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6670 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6671#else
6672 RT_NOREF(idxInstr);
6673#endif
6674
6675#ifdef RT_ARCH_AMD64
6676 /*
6677 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6678 * return raisexcpt();
6679 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6680 */
6681 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6682
6683 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6684 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6685 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6686 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6687 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6688
6689 iemNativeRegFreeTmp(pReNative, iTmpReg);
6690
6691#elif defined(RT_ARCH_ARM64)
6692 /*
6693 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6694 * return raisexcpt();
6695 * ----
6696 * mov x1, 0x800000000000
6697 * add x1, x0, x1
6698 * cmp xzr, x1, lsr 48
6699 * b.ne .Lraisexcpt
6700 */
6701 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6702
6703 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6704 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6705 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6706 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6707
6708 iemNativeRegFreeTmp(pReNative, iTmpReg);
6709
6710#else
6711# error "Port me"
6712#endif
6713 return off;
6714}
6715
6716
6717/**
6718 * Emits code to check if that the content of @a idxAddrReg is within the limit
6719 * of CS, raising a \#GP(0) if it isn't.
6720 *
6721 * @returns New code buffer offset; throws VBox status code on error.
6722 * @param pReNative The native recompile state.
6723 * @param off The code buffer offset.
6724 * @param idxAddrReg The host register (32-bit) with the address to
6725 * check.
6726 * @param idxInstr The current instruction.
6727 */
6728DECL_HIDDEN_THROW(uint32_t)
6729iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6730 uint8_t idxAddrReg, uint8_t idxInstr)
6731{
6732 /*
6733 * Make sure we don't have any outstanding guest register writes as we may
6734 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6735 */
6736 off = iemNativeRegFlushPendingWrites(pReNative, off);
6737
6738#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6739 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6740#else
6741 RT_NOREF(idxInstr);
6742#endif
6743
6744 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6745 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6746 kIemNativeGstRegUse_ReadOnly);
6747
6748 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6749 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6750
6751 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6752 return off;
6753}
6754
6755
6756/**
6757 * Emits a call to a CImpl function or something similar.
6758 */
6759DECL_HIDDEN_THROW(uint32_t)
6760iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6761 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6762{
6763 /* Writeback everything. */
6764 off = iemNativeRegFlushPendingWrites(pReNative, off);
6765
6766 /*
6767 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6768 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6769 */
6770 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6771 fGstShwFlush
6772 | RT_BIT_64(kIemNativeGstReg_Pc)
6773 | RT_BIT_64(kIemNativeGstReg_EFlags));
6774 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6775
6776 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6777
6778 /*
6779 * Load the parameters.
6780 */
6781#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6782 /* Special code the hidden VBOXSTRICTRC pointer. */
6783 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6784 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6785 if (cAddParams > 0)
6786 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6787 if (cAddParams > 1)
6788 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6789 if (cAddParams > 2)
6790 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6791 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6792
6793#else
6794 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6795 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6796 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6797 if (cAddParams > 0)
6798 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6799 if (cAddParams > 1)
6800 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6801 if (cAddParams > 2)
6802# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6803 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6804# else
6805 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6806# endif
6807#endif
6808
6809 /*
6810 * Make the call.
6811 */
6812 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6813
6814#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6815 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6816#endif
6817
6818 /*
6819 * Check the status code.
6820 */
6821 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6822}
6823
6824
6825/**
6826 * Emits a call to a threaded worker function.
6827 */
6828DECL_HIDDEN_THROW(uint32_t)
6829iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6830{
6831 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6832
6833 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6834 off = iemNativeRegFlushPendingWrites(pReNative, off);
6835
6836 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6837 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6838
6839#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6840 /* The threaded function may throw / long jmp, so set current instruction
6841 number if we're counting. */
6842 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6843#endif
6844
6845 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6846
6847#ifdef RT_ARCH_AMD64
6848 /* Load the parameters and emit the call. */
6849# ifdef RT_OS_WINDOWS
6850# ifndef VBOXSTRICTRC_STRICT_ENABLED
6851 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6852 if (cParams > 0)
6853 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6854 if (cParams > 1)
6855 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6856 if (cParams > 2)
6857 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6858# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6859 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6860 if (cParams > 0)
6861 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6862 if (cParams > 1)
6863 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6864 if (cParams > 2)
6865 {
6866 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6867 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6868 }
6869 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6870# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6871# else
6872 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6873 if (cParams > 0)
6874 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6875 if (cParams > 1)
6876 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6877 if (cParams > 2)
6878 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6879# endif
6880
6881 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6882
6883# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6884 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6885# endif
6886
6887#elif RT_ARCH_ARM64
6888 /*
6889 * ARM64:
6890 */
6891 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6892 if (cParams > 0)
6893 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6894 if (cParams > 1)
6895 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6896 if (cParams > 2)
6897 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6898
6899 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6900
6901#else
6902# error "port me"
6903#endif
6904
6905 /*
6906 * Check the status code.
6907 */
6908 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6909
6910 return off;
6911}
6912
6913#ifdef VBOX_WITH_STATISTICS
6914/**
6915 * Emits code to update the thread call statistics.
6916 */
6917DECL_INLINE_THROW(uint32_t)
6918iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6919{
6920 /*
6921 * Update threaded function stats.
6922 */
6923 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6924 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6925# if defined(RT_ARCH_ARM64)
6926 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6927 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6928 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6929 iemNativeRegFreeTmp(pReNative, idxTmp1);
6930 iemNativeRegFreeTmp(pReNative, idxTmp2);
6931# else
6932 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6933# endif
6934 return off;
6935}
6936#endif /* VBOX_WITH_STATISTICS */
6937
6938
6939/**
6940 * Emits the code at the ReturnWithFlags label (returns
6941 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6942 */
6943static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6944{
6945 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6946 if (idxLabel != UINT32_MAX)
6947 {
6948 iemNativeLabelDefine(pReNative, idxLabel, off);
6949
6950 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6951
6952 /* jump back to the return sequence. */
6953 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6954 }
6955 return off;
6956}
6957
6958
6959/**
6960 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6961 */
6962static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6963{
6964 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6965 if (idxLabel != UINT32_MAX)
6966 {
6967 iemNativeLabelDefine(pReNative, idxLabel, off);
6968
6969 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6970
6971 /* jump back to the return sequence. */
6972 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6973 }
6974 return off;
6975}
6976
6977
6978/**
6979 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6980 */
6981static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6982{
6983 /*
6984 * Generate the rc + rcPassUp fiddling code if needed.
6985 */
6986 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6987 if (idxLabel != UINT32_MAX)
6988 {
6989 iemNativeLabelDefine(pReNative, idxLabel, off);
6990
6991 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6992#ifdef RT_ARCH_AMD64
6993# ifdef RT_OS_WINDOWS
6994# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6995 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6996# endif
6997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6999# else
7000 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7001 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7002# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7003 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7004# endif
7005# endif
7006# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7007 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7008# endif
7009
7010#else
7011 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7013 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7014#endif
7015
7016 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7017 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7018 }
7019 return off;
7020}
7021
7022
7023/**
7024 * Emits a standard epilog.
7025 */
7026static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7027{
7028 *pidxReturnLabel = UINT32_MAX;
7029
7030 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7031 off = iemNativeRegFlushPendingWrites(pReNative, off);
7032
7033 /*
7034 * Successful return, so clear the return register (eax, w0).
7035 */
7036 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7037
7038 /*
7039 * Define label for common return point.
7040 */
7041 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7042 *pidxReturnLabel = idxReturn;
7043
7044 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7045
7046 /*
7047 * Restore registers and return.
7048 */
7049#ifdef RT_ARCH_AMD64
7050 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7051
7052 /* Reposition esp at the r15 restore point. */
7053 pbCodeBuf[off++] = X86_OP_REX_W;
7054 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7055 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7056 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7057
7058 /* Pop non-volatile registers and return */
7059 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7060 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7061 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7062 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7063 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7064 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7065 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7066 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7067# ifdef RT_OS_WINDOWS
7068 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7069 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7070# endif
7071 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7072 pbCodeBuf[off++] = 0xc9; /* leave */
7073 pbCodeBuf[off++] = 0xc3; /* ret */
7074 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7075
7076#elif RT_ARCH_ARM64
7077 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7078
7079 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7080 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7081 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7082 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7083 IEMNATIVE_FRAME_VAR_SIZE / 8);
7084 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7085 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7086 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7087 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7088 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7089 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7090 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7091 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7092 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7093 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7094 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7095 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7096
7097 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7098 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7099 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7100 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7101
7102 /* retab / ret */
7103# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7104 if (1)
7105 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7106 else
7107# endif
7108 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7109
7110#else
7111# error "port me"
7112#endif
7113 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7114
7115 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7116}
7117
7118
7119/**
7120 * Emits a standard prolog.
7121 */
7122static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7123{
7124#ifdef RT_ARCH_AMD64
7125 /*
7126 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7127 * reserving 64 bytes for stack variables plus 4 non-register argument
7128 * slots. Fixed register assignment: xBX = pReNative;
7129 *
7130 * Since we always do the same register spilling, we can use the same
7131 * unwind description for all the code.
7132 */
7133 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7134 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7135 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7136 pbCodeBuf[off++] = 0x8b;
7137 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7138 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7139 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7140# ifdef RT_OS_WINDOWS
7141 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7142 pbCodeBuf[off++] = 0x8b;
7143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7144 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7145 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7146# else
7147 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7148 pbCodeBuf[off++] = 0x8b;
7149 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7150# endif
7151 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7152 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7153 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7154 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7155 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7156 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7157 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7158 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7159
7160# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7161 /* Save the frame pointer. */
7162 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7163# endif
7164
7165 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7166 X86_GREG_xSP,
7167 IEMNATIVE_FRAME_ALIGN_SIZE
7168 + IEMNATIVE_FRAME_VAR_SIZE
7169 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7170 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7171 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7172 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7173 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7174
7175#elif RT_ARCH_ARM64
7176 /*
7177 * We set up a stack frame exactly like on x86, only we have to push the
7178 * return address our selves here. We save all non-volatile registers.
7179 */
7180 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7181
7182# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7183 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7184 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7185 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7186 /* pacibsp */
7187 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7188# endif
7189
7190 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7191 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7192 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7193 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7194 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7195 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7196 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7197 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7198 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7199 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7200 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7201 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7202 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7203 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7204 /* Save the BP and LR (ret address) registers at the top of the frame. */
7205 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7206 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7207 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7208 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7209 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7210 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7211
7212 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7213 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7214
7215 /* mov r28, r0 */
7216 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7217 /* mov r27, r1 */
7218 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7219
7220# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7221 /* Save the frame pointer. */
7222 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7223 ARMV8_A64_REG_X2);
7224# endif
7225
7226#else
7227# error "port me"
7228#endif
7229 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7230 return off;
7231}
7232
7233
7234/*********************************************************************************************************************************
7235* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7236*********************************************************************************************************************************/
7237
7238/**
7239 * Internal work that allocates a variable with kind set to
7240 * kIemNativeVarKind_Invalid and no current stack allocation.
7241 *
7242 * The kind will either be set by the caller or later when the variable is first
7243 * assigned a value.
7244 *
7245 * @returns Unpacked index.
7246 * @internal
7247 */
7248static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7249{
7250 Assert(cbType > 0 && cbType <= 64);
7251 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7252 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7253 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7254 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7255 pReNative->Core.aVars[idxVar].cbVar = cbType;
7256 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7257 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7258 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7259 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7260 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7261 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7262 pReNative->Core.aVars[idxVar].u.uValue = 0;
7263#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7264 pReNative->Core.aVars[idxVar].fSimdReg = false;
7265#endif
7266 return idxVar;
7267}
7268
7269
7270/**
7271 * Internal work that allocates an argument variable w/o setting enmKind.
7272 *
7273 * @returns Unpacked index.
7274 * @internal
7275 */
7276static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7277{
7278 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7279 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7280 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7281
7282 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7283 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7284 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7285 return idxVar;
7286}
7287
7288
7289/**
7290 * Gets the stack slot for a stack variable, allocating one if necessary.
7291 *
7292 * Calling this function implies that the stack slot will contain a valid
7293 * variable value. The caller deals with any register currently assigned to the
7294 * variable, typically by spilling it into the stack slot.
7295 *
7296 * @returns The stack slot number.
7297 * @param pReNative The recompiler state.
7298 * @param idxVar The variable.
7299 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7300 */
7301DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7302{
7303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7304 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7305 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7306
7307 /* Already got a slot? */
7308 uint8_t const idxStackSlot = pVar->idxStackSlot;
7309 if (idxStackSlot != UINT8_MAX)
7310 {
7311 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7312 return idxStackSlot;
7313 }
7314
7315 /*
7316 * A single slot is easy to allocate.
7317 * Allocate them from the top end, closest to BP, to reduce the displacement.
7318 */
7319 if (pVar->cbVar <= sizeof(uint64_t))
7320 {
7321 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7322 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7323 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7324 pVar->idxStackSlot = (uint8_t)iSlot;
7325 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7326 return (uint8_t)iSlot;
7327 }
7328
7329 /*
7330 * We need more than one stack slot.
7331 *
7332 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7333 */
7334 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7335 Assert(pVar->cbVar <= 64);
7336 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7337 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7338 uint32_t bmStack = pReNative->Core.bmStack;
7339 while (bmStack != UINT32_MAX)
7340 {
7341 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7342 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7343 iSlot = (iSlot - 1) & ~fBitAlignMask;
7344 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7345 {
7346 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7347 pVar->idxStackSlot = (uint8_t)iSlot;
7348 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7349 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7350 return (uint8_t)iSlot;
7351 }
7352
7353 bmStack |= (fBitAllocMask << iSlot);
7354 }
7355 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7356}
7357
7358
7359/**
7360 * Changes the variable to a stack variable.
7361 *
7362 * Currently this is s only possible to do the first time the variable is used,
7363 * switching later is can be implemented but not done.
7364 *
7365 * @param pReNative The recompiler state.
7366 * @param idxVar The variable.
7367 * @throws VERR_IEM_VAR_IPE_2
7368 */
7369DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7370{
7371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7372 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7373 if (pVar->enmKind != kIemNativeVarKind_Stack)
7374 {
7375 /* We could in theory transition from immediate to stack as well, but it
7376 would involve the caller doing work storing the value on the stack. So,
7377 till that's required we only allow transition from invalid. */
7378 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7379 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7380 pVar->enmKind = kIemNativeVarKind_Stack;
7381
7382 /* Note! We don't allocate a stack slot here, that's only done when a
7383 slot is actually needed to hold a variable value. */
7384 }
7385}
7386
7387
7388/**
7389 * Sets it to a variable with a constant value.
7390 *
7391 * This does not require stack storage as we know the value and can always
7392 * reload it, unless of course it's referenced.
7393 *
7394 * @param pReNative The recompiler state.
7395 * @param idxVar The variable.
7396 * @param uValue The immediate value.
7397 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7398 */
7399DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7400{
7401 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7402 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7403 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7404 {
7405 /* Only simple transitions for now. */
7406 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7407 pVar->enmKind = kIemNativeVarKind_Immediate;
7408 }
7409 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7410
7411 pVar->u.uValue = uValue;
7412 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7413 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7414 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7415}
7416
7417
7418/**
7419 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7420 *
7421 * This does not require stack storage as we know the value and can always
7422 * reload it. Loading is postponed till needed.
7423 *
7424 * @param pReNative The recompiler state.
7425 * @param idxVar The variable. Unpacked.
7426 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7427 *
7428 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7429 * @internal
7430 */
7431static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7432{
7433 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7434 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7435
7436 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7437 {
7438 /* Only simple transitions for now. */
7439 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7440 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7441 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7442 }
7443 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7444
7445 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7446
7447 /* Update the other variable, ensure it's a stack variable. */
7448 /** @todo handle variables with const values... that'll go boom now. */
7449 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7450 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7451}
7452
7453
7454/**
7455 * Sets the variable to a reference (pointer) to a guest register reference.
7456 *
7457 * This does not require stack storage as we know the value and can always
7458 * reload it. Loading is postponed till needed.
7459 *
7460 * @param pReNative The recompiler state.
7461 * @param idxVar The variable.
7462 * @param enmRegClass The class guest registers to reference.
7463 * @param idxReg The register within @a enmRegClass to reference.
7464 *
7465 * @throws VERR_IEM_VAR_IPE_2
7466 */
7467DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7468 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7469{
7470 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7471 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7472
7473 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7474 {
7475 /* Only simple transitions for now. */
7476 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7477 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7478 }
7479 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7480
7481 pVar->u.GstRegRef.enmClass = enmRegClass;
7482 pVar->u.GstRegRef.idx = idxReg;
7483}
7484
7485
7486DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7487{
7488 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7489}
7490
7491
7492DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7493{
7494 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7495
7496 /* Since we're using a generic uint64_t value type, we must truncate it if
7497 the variable is smaller otherwise we may end up with too large value when
7498 scaling up a imm8 w/ sign-extension.
7499
7500 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7501 in the bios, bx=1) when running on arm, because clang expect 16-bit
7502 register parameters to have bits 16 and up set to zero. Instead of
7503 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7504 CF value in the result. */
7505 switch (cbType)
7506 {
7507 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7508 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7509 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7510 }
7511 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7512 return idxVar;
7513}
7514
7515
7516DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7517{
7518 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7519 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7520 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7521 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7522 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7523 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7524
7525 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7526 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7527 return idxArgVar;
7528}
7529
7530
7531DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7532{
7533 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7534 /* Don't set to stack now, leave that to the first use as for instance
7535 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7536 return idxVar;
7537}
7538
7539
7540DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7541{
7542 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7543
7544 /* Since we're using a generic uint64_t value type, we must truncate it if
7545 the variable is smaller otherwise we may end up with too large value when
7546 scaling up a imm8 w/ sign-extension. */
7547 switch (cbType)
7548 {
7549 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7550 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7551 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7552 }
7553 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7554 return idxVar;
7555}
7556
7557
7558/**
7559 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7560 * fixed till we call iemNativeVarRegisterRelease.
7561 *
7562 * @returns The host register number.
7563 * @param pReNative The recompiler state.
7564 * @param idxVar The variable.
7565 * @param poff Pointer to the instruction buffer offset.
7566 * In case a register needs to be freed up or the value
7567 * loaded off the stack.
7568 * @param fInitialized Set if the variable must already have been initialized.
7569 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7570 * the case.
7571 * @param idxRegPref Preferred register number or UINT8_MAX.
7572 */
7573DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7574 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7575{
7576 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7577 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7578 Assert(pVar->cbVar <= 8);
7579 Assert(!pVar->fRegAcquired);
7580
7581 uint8_t idxReg = pVar->idxReg;
7582 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7583 {
7584 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7585 && pVar->enmKind < kIemNativeVarKind_End);
7586 pVar->fRegAcquired = true;
7587 return idxReg;
7588 }
7589
7590 /*
7591 * If the kind of variable has not yet been set, default to 'stack'.
7592 */
7593 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7594 && pVar->enmKind < kIemNativeVarKind_End);
7595 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7596 iemNativeVarSetKindToStack(pReNative, idxVar);
7597
7598 /*
7599 * We have to allocate a register for the variable, even if its a stack one
7600 * as we don't know if there are modification being made to it before its
7601 * finalized (todo: analyze and insert hints about that?).
7602 *
7603 * If we can, we try get the correct register for argument variables. This
7604 * is assuming that most argument variables are fetched as close as possible
7605 * to the actual call, so that there aren't any interfering hidden calls
7606 * (memory accesses, etc) inbetween.
7607 *
7608 * If we cannot or it's a variable, we make sure no argument registers
7609 * that will be used by this MC block will be allocated here, and we always
7610 * prefer non-volatile registers to avoid needing to spill stuff for internal
7611 * call.
7612 */
7613 /** @todo Detect too early argument value fetches and warn about hidden
7614 * calls causing less optimal code to be generated in the python script. */
7615
7616 uint8_t const uArgNo = pVar->uArgNo;
7617 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7618 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7619 {
7620 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7621 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7622 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7623 }
7624 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7625 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7626 {
7627 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7628 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7629 & ~pReNative->Core.bmHstRegsWithGstShadow
7630 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7631 & fNotArgsMask;
7632 if (fRegs)
7633 {
7634 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7635 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7636 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7637 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7638 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7639 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7640 }
7641 else
7642 {
7643 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7644 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7645 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7646 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7647 }
7648 }
7649 else
7650 {
7651 idxReg = idxRegPref;
7652 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7653 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7654 }
7655 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7656 pVar->idxReg = idxReg;
7657
7658#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7659 pVar->fSimdReg = false;
7660#endif
7661
7662 /*
7663 * Load it off the stack if we've got a stack slot.
7664 */
7665 uint8_t const idxStackSlot = pVar->idxStackSlot;
7666 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7667 {
7668 Assert(fInitialized);
7669 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7670 switch (pVar->cbVar)
7671 {
7672 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7673 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7674 case 3: AssertFailed(); RT_FALL_THRU();
7675 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7676 default: AssertFailed(); RT_FALL_THRU();
7677 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7678 }
7679 }
7680 else
7681 {
7682 Assert(idxStackSlot == UINT8_MAX);
7683 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7684 }
7685 pVar->fRegAcquired = true;
7686 return idxReg;
7687}
7688
7689
7690#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7691/**
7692 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7693 * fixed till we call iemNativeVarRegisterRelease.
7694 *
7695 * @returns The host register number.
7696 * @param pReNative The recompiler state.
7697 * @param idxVar The variable.
7698 * @param poff Pointer to the instruction buffer offset.
7699 * In case a register needs to be freed up or the value
7700 * loaded off the stack.
7701 * @param fInitialized Set if the variable must already have been initialized.
7702 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7703 * the case.
7704 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7705 */
7706DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7707 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7708{
7709 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7710 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7711 Assert( pVar->cbVar == sizeof(RTUINT128U)
7712 || pVar->cbVar == sizeof(RTUINT256U));
7713 Assert(!pVar->fRegAcquired);
7714
7715 uint8_t idxReg = pVar->idxReg;
7716 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7717 {
7718 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7719 && pVar->enmKind < kIemNativeVarKind_End);
7720 pVar->fRegAcquired = true;
7721 return idxReg;
7722 }
7723
7724 /*
7725 * If the kind of variable has not yet been set, default to 'stack'.
7726 */
7727 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7728 && pVar->enmKind < kIemNativeVarKind_End);
7729 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7730 iemNativeVarSetKindToStack(pReNative, idxVar);
7731
7732 /*
7733 * We have to allocate a register for the variable, even if its a stack one
7734 * as we don't know if there are modification being made to it before its
7735 * finalized (todo: analyze and insert hints about that?).
7736 *
7737 * If we can, we try get the correct register for argument variables. This
7738 * is assuming that most argument variables are fetched as close as possible
7739 * to the actual call, so that there aren't any interfering hidden calls
7740 * (memory accesses, etc) inbetween.
7741 *
7742 * If we cannot or it's a variable, we make sure no argument registers
7743 * that will be used by this MC block will be allocated here, and we always
7744 * prefer non-volatile registers to avoid needing to spill stuff for internal
7745 * call.
7746 */
7747 /** @todo Detect too early argument value fetches and warn about hidden
7748 * calls causing less optimal code to be generated in the python script. */
7749
7750 uint8_t const uArgNo = pVar->uArgNo;
7751 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7752
7753 /* SIMD is bit simpler for now because there is no support for arguments. */
7754 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7755 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7756 {
7757 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7758 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7759 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7760 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7761 & fNotArgsMask;
7762 if (fRegs)
7763 {
7764 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7765 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7766 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7767 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7768 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7769 }
7770 else
7771 {
7772 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7773 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7774 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7775 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7776 }
7777 }
7778 else
7779 {
7780 idxReg = idxRegPref;
7781 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7782 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7783 }
7784 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7785
7786 pVar->fSimdReg = true;
7787 pVar->idxReg = idxReg;
7788
7789 /*
7790 * Load it off the stack if we've got a stack slot.
7791 */
7792 uint8_t const idxStackSlot = pVar->idxStackSlot;
7793 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7794 {
7795 Assert(fInitialized);
7796 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7797 switch (pVar->cbVar)
7798 {
7799 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7800 default: AssertFailed(); RT_FALL_THRU();
7801 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7802 }
7803 }
7804 else
7805 {
7806 Assert(idxStackSlot == UINT8_MAX);
7807 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7808 }
7809 pVar->fRegAcquired = true;
7810 return idxReg;
7811}
7812#endif
7813
7814
7815/**
7816 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7817 * guest register.
7818 *
7819 * This function makes sure there is a register for it and sets it to be the
7820 * current shadow copy of @a enmGstReg.
7821 *
7822 * @returns The host register number.
7823 * @param pReNative The recompiler state.
7824 * @param idxVar The variable.
7825 * @param enmGstReg The guest register this variable will be written to
7826 * after this call.
7827 * @param poff Pointer to the instruction buffer offset.
7828 * In case a register needs to be freed up or if the
7829 * variable content needs to be loaded off the stack.
7830 *
7831 * @note We DO NOT expect @a idxVar to be an argument variable,
7832 * because we can only in the commit stage of an instruction when this
7833 * function is used.
7834 */
7835DECL_HIDDEN_THROW(uint8_t)
7836iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7837{
7838 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7839 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7840 Assert(!pVar->fRegAcquired);
7841 AssertMsgStmt( pVar->cbVar <= 8
7842 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7843 || pVar->enmKind == kIemNativeVarKind_Stack),
7844 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7845 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7846 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7847
7848 /*
7849 * This shouldn't ever be used for arguments, unless it's in a weird else
7850 * branch that doesn't do any calling and even then it's questionable.
7851 *
7852 * However, in case someone writes crazy wrong MC code and does register
7853 * updates before making calls, just use the regular register allocator to
7854 * ensure we get a register suitable for the intended argument number.
7855 */
7856 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7857
7858 /*
7859 * If there is already a register for the variable, we transfer/set the
7860 * guest shadow copy assignment to it.
7861 */
7862 uint8_t idxReg = pVar->idxReg;
7863 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7864 {
7865 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7866 {
7867 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7868 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7869 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7870 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7871 }
7872 else
7873 {
7874 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7875 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7876 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7877 }
7878 /** @todo figure this one out. We need some way of making sure the register isn't
7879 * modified after this point, just in case we start writing crappy MC code. */
7880 pVar->enmGstReg = enmGstReg;
7881 pVar->fRegAcquired = true;
7882 return idxReg;
7883 }
7884 Assert(pVar->uArgNo == UINT8_MAX);
7885
7886 /*
7887 * Because this is supposed to be the commit stage, we're just tag along with the
7888 * temporary register allocator and upgrade it to a variable register.
7889 */
7890 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7891 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7892 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7893 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7894 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7895 pVar->idxReg = idxReg;
7896
7897 /*
7898 * Now we need to load the register value.
7899 */
7900 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7901 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7902 else
7903 {
7904 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7905 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7906 switch (pVar->cbVar)
7907 {
7908 case sizeof(uint64_t):
7909 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7910 break;
7911 case sizeof(uint32_t):
7912 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7913 break;
7914 case sizeof(uint16_t):
7915 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7916 break;
7917 case sizeof(uint8_t):
7918 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7919 break;
7920 default:
7921 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7922 }
7923 }
7924
7925 pVar->fRegAcquired = true;
7926 return idxReg;
7927}
7928
7929
7930/**
7931 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7932 *
7933 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7934 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7935 * requirement of flushing anything in volatile host registers when making a
7936 * call.
7937 *
7938 * @returns New @a off value.
7939 * @param pReNative The recompiler state.
7940 * @param off The code buffer position.
7941 * @param fHstRegsNotToSave Set of registers not to save & restore.
7942 */
7943DECL_HIDDEN_THROW(uint32_t)
7944iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7945{
7946 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7947 if (fHstRegs)
7948 {
7949 do
7950 {
7951 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7952 fHstRegs &= ~RT_BIT_32(idxHstReg);
7953
7954 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7955 {
7956 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7958 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7959 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7960 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7961 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7962 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7963 {
7964 case kIemNativeVarKind_Stack:
7965 {
7966 /* Temporarily spill the variable register. */
7967 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7968 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7969 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7970 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7971 continue;
7972 }
7973
7974 case kIemNativeVarKind_Immediate:
7975 case kIemNativeVarKind_VarRef:
7976 case kIemNativeVarKind_GstRegRef:
7977 /* It is weird to have any of these loaded at this point. */
7978 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7979 continue;
7980
7981 case kIemNativeVarKind_End:
7982 case kIemNativeVarKind_Invalid:
7983 break;
7984 }
7985 AssertFailed();
7986 }
7987 else
7988 {
7989 /*
7990 * Allocate a temporary stack slot and spill the register to it.
7991 */
7992 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7993 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7994 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7995 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7996 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7997 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7998 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7999 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8000 }
8001 } while (fHstRegs);
8002 }
8003#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8004 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8005 if (fHstRegs)
8006 {
8007 do
8008 {
8009 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8010 fHstRegs &= ~RT_BIT_32(idxHstReg);
8011
8012 /*
8013 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
8014 * which would be more difficult due to spanning multiple stack slots and different sizes
8015 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
8016 * don't need saving.
8017 */
8018 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8019 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8020 continue;
8021
8022 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8023
8024 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8026 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8027 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8028 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8029 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8030 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8031 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8032 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8033 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8034 {
8035 case kIemNativeVarKind_Stack:
8036 {
8037 /* Temporarily spill the variable register. */
8038 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8039 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8040 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8041 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8042 if (cbVar == sizeof(RTUINT128U))
8043 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8044 else
8045 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8046 continue;
8047 }
8048
8049 case kIemNativeVarKind_Immediate:
8050 case kIemNativeVarKind_VarRef:
8051 case kIemNativeVarKind_GstRegRef:
8052 /* It is weird to have any of these loaded at this point. */
8053 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8054 continue;
8055
8056 case kIemNativeVarKind_End:
8057 case kIemNativeVarKind_Invalid:
8058 break;
8059 }
8060 AssertFailed();
8061 } while (fHstRegs);
8062 }
8063#endif
8064 return off;
8065}
8066
8067
8068/**
8069 * Emit code to restore volatile registers after to a call to a helper.
8070 *
8071 * @returns New @a off value.
8072 * @param pReNative The recompiler state.
8073 * @param off The code buffer position.
8074 * @param fHstRegsNotToSave Set of registers not to save & restore.
8075 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8076 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8077 */
8078DECL_HIDDEN_THROW(uint32_t)
8079iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8080{
8081 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8082 if (fHstRegs)
8083 {
8084 do
8085 {
8086 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8087 fHstRegs &= ~RT_BIT_32(idxHstReg);
8088
8089 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8090 {
8091 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8092 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8093 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8094 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8095 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8096 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8097 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8098 {
8099 case kIemNativeVarKind_Stack:
8100 {
8101 /* Unspill the variable register. */
8102 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8103 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8104 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8105 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8106 continue;
8107 }
8108
8109 case kIemNativeVarKind_Immediate:
8110 case kIemNativeVarKind_VarRef:
8111 case kIemNativeVarKind_GstRegRef:
8112 /* It is weird to have any of these loaded at this point. */
8113 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8114 continue;
8115
8116 case kIemNativeVarKind_End:
8117 case kIemNativeVarKind_Invalid:
8118 break;
8119 }
8120 AssertFailed();
8121 }
8122 else
8123 {
8124 /*
8125 * Restore from temporary stack slot.
8126 */
8127 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8128 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8129 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8130 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8131
8132 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8133 }
8134 } while (fHstRegs);
8135 }
8136#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8137 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8138 if (fHstRegs)
8139 {
8140 do
8141 {
8142 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8143 fHstRegs &= ~RT_BIT_32(idxHstReg);
8144
8145 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8146 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8147 continue;
8148 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8149
8150 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8151 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8152 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8153 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8154 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8155 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8156 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8157 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8158 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8159 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8160 {
8161 case kIemNativeVarKind_Stack:
8162 {
8163 /* Unspill the variable register. */
8164 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8165 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8166 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8167 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8168
8169 if (cbVar == sizeof(RTUINT128U))
8170 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8171 else
8172 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8173 continue;
8174 }
8175
8176 case kIemNativeVarKind_Immediate:
8177 case kIemNativeVarKind_VarRef:
8178 case kIemNativeVarKind_GstRegRef:
8179 /* It is weird to have any of these loaded at this point. */
8180 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8181 continue;
8182
8183 case kIemNativeVarKind_End:
8184 case kIemNativeVarKind_Invalid:
8185 break;
8186 }
8187 AssertFailed();
8188 } while (fHstRegs);
8189 }
8190#endif
8191 return off;
8192}
8193
8194
8195/**
8196 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8197 *
8198 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8199 *
8200 * ASSUMES that @a idxVar is valid and unpacked.
8201 */
8202DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8203{
8204 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8205 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8206 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8207 {
8208 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8209 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8210 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8211 Assert(cSlots > 0);
8212 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8213 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8214 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8215 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8216 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8217 }
8218 else
8219 Assert(idxStackSlot == UINT8_MAX);
8220}
8221
8222
8223/**
8224 * Worker that frees a single variable.
8225 *
8226 * ASSUMES that @a idxVar is valid and unpacked.
8227 */
8228DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8229{
8230 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8231 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8232 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8233
8234 /* Free the host register first if any assigned. */
8235 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8236#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8237 if ( idxHstReg != UINT8_MAX
8238 && pReNative->Core.aVars[idxVar].fSimdReg)
8239 {
8240 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8241 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8242 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8243 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8244 }
8245 else
8246#endif
8247 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8248 {
8249 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8250 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8251 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8252 }
8253
8254 /* Free argument mapping. */
8255 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8256 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8257 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8258
8259 /* Free the stack slots. */
8260 iemNativeVarFreeStackSlots(pReNative, idxVar);
8261
8262 /* Free the actual variable. */
8263 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8264 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8265}
8266
8267
8268/**
8269 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8270 */
8271DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8272{
8273 while (bmVars != 0)
8274 {
8275 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8276 bmVars &= ~RT_BIT_32(idxVar);
8277
8278#if 1 /** @todo optimize by simplifying this later... */
8279 iemNativeVarFreeOneWorker(pReNative, idxVar);
8280#else
8281 /* Only need to free the host register, the rest is done as bulk updates below. */
8282 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8283 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8284 {
8285 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8286 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8287 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8288 }
8289#endif
8290 }
8291#if 0 /** @todo optimize by simplifying this later... */
8292 pReNative->Core.bmVars = 0;
8293 pReNative->Core.bmStack = 0;
8294 pReNative->Core.u64ArgVars = UINT64_MAX;
8295#endif
8296}
8297
8298
8299
8300/*********************************************************************************************************************************
8301* Emitters for IEM_MC_CALL_CIMPL_XXX *
8302*********************************************************************************************************************************/
8303
8304/**
8305 * Emits code to load a reference to the given guest register into @a idxGprDst.
8306 */
8307DECL_HIDDEN_THROW(uint32_t)
8308iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8309 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8310{
8311#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8312 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8313#endif
8314
8315 /*
8316 * Get the offset relative to the CPUMCTX structure.
8317 */
8318 uint32_t offCpumCtx;
8319 switch (enmClass)
8320 {
8321 case kIemNativeGstRegRef_Gpr:
8322 Assert(idxRegInClass < 16);
8323 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8324 break;
8325
8326 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8327 Assert(idxRegInClass < 4);
8328 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8329 break;
8330
8331 case kIemNativeGstRegRef_EFlags:
8332 Assert(idxRegInClass == 0);
8333 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8334 break;
8335
8336 case kIemNativeGstRegRef_MxCsr:
8337 Assert(idxRegInClass == 0);
8338 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8339 break;
8340
8341 case kIemNativeGstRegRef_FpuReg:
8342 Assert(idxRegInClass < 8);
8343 AssertFailed(); /** @todo what kind of indexing? */
8344 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8345 break;
8346
8347 case kIemNativeGstRegRef_MReg:
8348 Assert(idxRegInClass < 8);
8349 AssertFailed(); /** @todo what kind of indexing? */
8350 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8351 break;
8352
8353 case kIemNativeGstRegRef_XReg:
8354 Assert(idxRegInClass < 16);
8355 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8356 break;
8357
8358 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8359 Assert(idxRegInClass == 0);
8360 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8361 break;
8362
8363 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8364 Assert(idxRegInClass == 0);
8365 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8366 break;
8367
8368 default:
8369 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8370 }
8371
8372 /*
8373 * Load the value into the destination register.
8374 */
8375#ifdef RT_ARCH_AMD64
8376 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8377
8378#elif defined(RT_ARCH_ARM64)
8379 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8380 Assert(offCpumCtx < 4096);
8381 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8382
8383#else
8384# error "Port me!"
8385#endif
8386
8387 return off;
8388}
8389
8390
8391/**
8392 * Common code for CIMPL and AIMPL calls.
8393 *
8394 * These are calls that uses argument variables and such. They should not be
8395 * confused with internal calls required to implement an MC operation,
8396 * like a TLB load and similar.
8397 *
8398 * Upon return all that is left to do is to load any hidden arguments and
8399 * perform the call. All argument variables are freed.
8400 *
8401 * @returns New code buffer offset; throws VBox status code on error.
8402 * @param pReNative The native recompile state.
8403 * @param off The code buffer offset.
8404 * @param cArgs The total nubmer of arguments (includes hidden
8405 * count).
8406 * @param cHiddenArgs The number of hidden arguments. The hidden
8407 * arguments must not have any variable declared for
8408 * them, whereas all the regular arguments must
8409 * (tstIEMCheckMc ensures this).
8410 */
8411DECL_HIDDEN_THROW(uint32_t)
8412iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8413{
8414#ifdef VBOX_STRICT
8415 /*
8416 * Assert sanity.
8417 */
8418 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8419 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8420 for (unsigned i = 0; i < cHiddenArgs; i++)
8421 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8422 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8423 {
8424 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8425 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8426 }
8427 iemNativeRegAssertSanity(pReNative);
8428#endif
8429
8430 /* We don't know what the called function makes use of, so flush any pending register writes. */
8431 off = iemNativeRegFlushPendingWrites(pReNative, off);
8432
8433 /*
8434 * Before we do anything else, go over variables that are referenced and
8435 * make sure they are not in a register.
8436 */
8437 uint32_t bmVars = pReNative->Core.bmVars;
8438 if (bmVars)
8439 {
8440 do
8441 {
8442 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8443 bmVars &= ~RT_BIT_32(idxVar);
8444
8445 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8446 {
8447 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8448#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8449 if ( idxRegOld != UINT8_MAX
8450 && pReNative->Core.aVars[idxVar].fSimdReg)
8451 {
8452 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8453 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8454
8455 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8456 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8457 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8458 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8459 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8460 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8461 else
8462 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8463
8464 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8465 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8466
8467 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8468 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8469 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8470 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8471 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8472 }
8473 else
8474#endif
8475 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8476 {
8477 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8478 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8479 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8480 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8481 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8482
8483 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8484 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8485 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8486 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8487 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8488 }
8489 }
8490 } while (bmVars != 0);
8491#if 0 //def VBOX_STRICT
8492 iemNativeRegAssertSanity(pReNative);
8493#endif
8494 }
8495
8496 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8497
8498 /*
8499 * First, go over the host registers that will be used for arguments and make
8500 * sure they either hold the desired argument or are free.
8501 */
8502 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8503 {
8504 for (uint32_t i = 0; i < cRegArgs; i++)
8505 {
8506 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8507 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8508 {
8509 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8510 {
8511 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8512 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8513 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8514 Assert(pVar->idxReg == idxArgReg);
8515 uint8_t const uArgNo = pVar->uArgNo;
8516 if (uArgNo == i)
8517 { /* prefect */ }
8518 /* The variable allocator logic should make sure this is impossible,
8519 except for when the return register is used as a parameter (ARM,
8520 but not x86). */
8521#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8522 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8523 {
8524# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8525# error "Implement this"
8526# endif
8527 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8528 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8529 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8530 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8531 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8532 }
8533#endif
8534 else
8535 {
8536 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8537
8538 if (pVar->enmKind == kIemNativeVarKind_Stack)
8539 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8540 else
8541 {
8542 /* just free it, can be reloaded if used again */
8543 pVar->idxReg = UINT8_MAX;
8544 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8545 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8546 }
8547 }
8548 }
8549 else
8550 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8551 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8552 }
8553 }
8554#if 0 //def VBOX_STRICT
8555 iemNativeRegAssertSanity(pReNative);
8556#endif
8557 }
8558
8559 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8560
8561#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8562 /*
8563 * If there are any stack arguments, make sure they are in their place as well.
8564 *
8565 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8566 * the caller) be loading it later and it must be free (see first loop).
8567 */
8568 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8569 {
8570 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8571 {
8572 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8573 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8574 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8575 {
8576 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8577 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8578 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8579 pVar->idxReg = UINT8_MAX;
8580 }
8581 else
8582 {
8583 /* Use ARG0 as temp for stuff we need registers for. */
8584 switch (pVar->enmKind)
8585 {
8586 case kIemNativeVarKind_Stack:
8587 {
8588 uint8_t const idxStackSlot = pVar->idxStackSlot;
8589 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8590 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8591 iemNativeStackCalcBpDisp(idxStackSlot));
8592 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8593 continue;
8594 }
8595
8596 case kIemNativeVarKind_Immediate:
8597 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8598 continue;
8599
8600 case kIemNativeVarKind_VarRef:
8601 {
8602 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8603 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8604 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8605 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8606 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8607# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8608 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8609 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8610 if ( fSimdReg
8611 && idxRegOther != UINT8_MAX)
8612 {
8613 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8614 if (cbVar == sizeof(RTUINT128U))
8615 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8616 else
8617 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8618 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8619 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8620 }
8621 else
8622# endif
8623 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8624 {
8625 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8626 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8627 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8628 }
8629 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8630 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8631 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8632 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8633 continue;
8634 }
8635
8636 case kIemNativeVarKind_GstRegRef:
8637 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8638 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8639 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8640 continue;
8641
8642 case kIemNativeVarKind_Invalid:
8643 case kIemNativeVarKind_End:
8644 break;
8645 }
8646 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8647 }
8648 }
8649# if 0 //def VBOX_STRICT
8650 iemNativeRegAssertSanity(pReNative);
8651# endif
8652 }
8653#else
8654 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8655#endif
8656
8657 /*
8658 * Make sure the argument variables are loaded into their respective registers.
8659 *
8660 * We can optimize this by ASSUMING that any register allocations are for
8661 * registeres that have already been loaded and are ready. The previous step
8662 * saw to that.
8663 */
8664 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8665 {
8666 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8667 {
8668 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8669 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8670 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8671 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8672 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8673 else
8674 {
8675 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8676 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8677 {
8678 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8679 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8680 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8681 | RT_BIT_32(idxArgReg);
8682 pVar->idxReg = idxArgReg;
8683 }
8684 else
8685 {
8686 /* Use ARG0 as temp for stuff we need registers for. */
8687 switch (pVar->enmKind)
8688 {
8689 case kIemNativeVarKind_Stack:
8690 {
8691 uint8_t const idxStackSlot = pVar->idxStackSlot;
8692 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8693 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8694 continue;
8695 }
8696
8697 case kIemNativeVarKind_Immediate:
8698 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8699 continue;
8700
8701 case kIemNativeVarKind_VarRef:
8702 {
8703 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8704 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8705 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8706 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8707 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8708 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8709#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8710 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8711 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8712 if ( fSimdReg
8713 && idxRegOther != UINT8_MAX)
8714 {
8715 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8716 if (cbVar == sizeof(RTUINT128U))
8717 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8718 else
8719 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8720 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8721 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8722 }
8723 else
8724#endif
8725 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8726 {
8727 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8728 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8729 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8730 }
8731 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8732 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8733 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8734 continue;
8735 }
8736
8737 case kIemNativeVarKind_GstRegRef:
8738 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8739 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8740 continue;
8741
8742 case kIemNativeVarKind_Invalid:
8743 case kIemNativeVarKind_End:
8744 break;
8745 }
8746 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8747 }
8748 }
8749 }
8750#if 0 //def VBOX_STRICT
8751 iemNativeRegAssertSanity(pReNative);
8752#endif
8753 }
8754#ifdef VBOX_STRICT
8755 else
8756 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8757 {
8758 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8759 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8760 }
8761#endif
8762
8763 /*
8764 * Free all argument variables (simplified).
8765 * Their lifetime always expires with the call they are for.
8766 */
8767 /** @todo Make the python script check that arguments aren't used after
8768 * IEM_MC_CALL_XXXX. */
8769 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8770 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8771 * an argument value. There is also some FPU stuff. */
8772 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8773 {
8774 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8775 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8776
8777 /* no need to free registers: */
8778 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8779 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8780 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8781 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8782 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8783 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8784
8785 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8786 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8787 iemNativeVarFreeStackSlots(pReNative, idxVar);
8788 }
8789 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8790
8791 /*
8792 * Flush volatile registers as we make the call.
8793 */
8794 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8795
8796 return off;
8797}
8798
8799
8800
8801/*********************************************************************************************************************************
8802* TLB Lookup. *
8803*********************************************************************************************************************************/
8804
8805/**
8806 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8807 */
8808DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8809{
8810 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8811 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8812 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8813 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8814
8815 /* Do the lookup manually. */
8816 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8817 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8818 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8819 if (RT_LIKELY(pTlbe->uTag == uTag))
8820 {
8821 /*
8822 * Check TLB page table level access flags.
8823 */
8824 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8825 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8826 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8827 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8828 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8829 | IEMTLBE_F_PG_UNASSIGNED
8830 | IEMTLBE_F_PT_NO_ACCESSED
8831 | fNoWriteNoDirty | fNoUser);
8832 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8833 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8834 {
8835 /*
8836 * Return the address.
8837 */
8838 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8839 if ((uintptr_t)pbAddr == uResult)
8840 return;
8841 RT_NOREF(cbMem);
8842 AssertFailed();
8843 }
8844 else
8845 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8846 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8847 }
8848 else
8849 AssertFailed();
8850 RT_BREAKPOINT();
8851}
8852
8853/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8854
8855
8856
8857/*********************************************************************************************************************************
8858* Recompiler Core. *
8859*********************************************************************************************************************************/
8860
8861/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8862static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8863{
8864 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8865 pDis->cbCachedInstr += cbMaxRead;
8866 RT_NOREF(cbMinRead);
8867 return VERR_NO_DATA;
8868}
8869
8870
8871DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8872{
8873 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8874 {
8875#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8876 ENTRY(fLocalForcedActions),
8877 ENTRY(iem.s.rcPassUp),
8878 ENTRY(iem.s.fExec),
8879 ENTRY(iem.s.pbInstrBuf),
8880 ENTRY(iem.s.uInstrBufPc),
8881 ENTRY(iem.s.GCPhysInstrBuf),
8882 ENTRY(iem.s.cbInstrBufTotal),
8883 ENTRY(iem.s.idxTbCurInstr),
8884#ifdef VBOX_WITH_STATISTICS
8885 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8886 ENTRY(iem.s.StatNativeTlbHitsForStore),
8887 ENTRY(iem.s.StatNativeTlbHitsForStack),
8888 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8889 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8890 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8891 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8892 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8893#endif
8894 ENTRY(iem.s.DataTlb.aEntries),
8895 ENTRY(iem.s.DataTlb.uTlbRevision),
8896 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8897 ENTRY(iem.s.DataTlb.cTlbHits),
8898 ENTRY(iem.s.CodeTlb.aEntries),
8899 ENTRY(iem.s.CodeTlb.uTlbRevision),
8900 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8901 ENTRY(iem.s.CodeTlb.cTlbHits),
8902 ENTRY(pVMR3),
8903 ENTRY(cpum.GstCtx.rax),
8904 ENTRY(cpum.GstCtx.ah),
8905 ENTRY(cpum.GstCtx.rcx),
8906 ENTRY(cpum.GstCtx.ch),
8907 ENTRY(cpum.GstCtx.rdx),
8908 ENTRY(cpum.GstCtx.dh),
8909 ENTRY(cpum.GstCtx.rbx),
8910 ENTRY(cpum.GstCtx.bh),
8911 ENTRY(cpum.GstCtx.rsp),
8912 ENTRY(cpum.GstCtx.rbp),
8913 ENTRY(cpum.GstCtx.rsi),
8914 ENTRY(cpum.GstCtx.rdi),
8915 ENTRY(cpum.GstCtx.r8),
8916 ENTRY(cpum.GstCtx.r9),
8917 ENTRY(cpum.GstCtx.r10),
8918 ENTRY(cpum.GstCtx.r11),
8919 ENTRY(cpum.GstCtx.r12),
8920 ENTRY(cpum.GstCtx.r13),
8921 ENTRY(cpum.GstCtx.r14),
8922 ENTRY(cpum.GstCtx.r15),
8923 ENTRY(cpum.GstCtx.es.Sel),
8924 ENTRY(cpum.GstCtx.es.u64Base),
8925 ENTRY(cpum.GstCtx.es.u32Limit),
8926 ENTRY(cpum.GstCtx.es.Attr),
8927 ENTRY(cpum.GstCtx.cs.Sel),
8928 ENTRY(cpum.GstCtx.cs.u64Base),
8929 ENTRY(cpum.GstCtx.cs.u32Limit),
8930 ENTRY(cpum.GstCtx.cs.Attr),
8931 ENTRY(cpum.GstCtx.ss.Sel),
8932 ENTRY(cpum.GstCtx.ss.u64Base),
8933 ENTRY(cpum.GstCtx.ss.u32Limit),
8934 ENTRY(cpum.GstCtx.ss.Attr),
8935 ENTRY(cpum.GstCtx.ds.Sel),
8936 ENTRY(cpum.GstCtx.ds.u64Base),
8937 ENTRY(cpum.GstCtx.ds.u32Limit),
8938 ENTRY(cpum.GstCtx.ds.Attr),
8939 ENTRY(cpum.GstCtx.fs.Sel),
8940 ENTRY(cpum.GstCtx.fs.u64Base),
8941 ENTRY(cpum.GstCtx.fs.u32Limit),
8942 ENTRY(cpum.GstCtx.fs.Attr),
8943 ENTRY(cpum.GstCtx.gs.Sel),
8944 ENTRY(cpum.GstCtx.gs.u64Base),
8945 ENTRY(cpum.GstCtx.gs.u32Limit),
8946 ENTRY(cpum.GstCtx.gs.Attr),
8947 ENTRY(cpum.GstCtx.rip),
8948 ENTRY(cpum.GstCtx.eflags),
8949 ENTRY(cpum.GstCtx.uRipInhibitInt),
8950 ENTRY(cpum.GstCtx.cr0),
8951 ENTRY(cpum.GstCtx.cr4),
8952 ENTRY(cpum.GstCtx.aXcr[0]),
8953 ENTRY(cpum.GstCtx.aXcr[1]),
8954#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8955 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8956 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8957 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8958 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8959 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8960 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8961 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8962 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8963 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8964 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8965 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8966 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8967 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8968 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8969 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8970 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8971 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8972 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8973 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8974 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8975 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8976 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8977 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8978 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8979 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8980 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8981 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8982 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8983 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8984 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8985 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8986 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8987#endif
8988#undef ENTRY
8989 };
8990#ifdef VBOX_STRICT
8991 static bool s_fOrderChecked = false;
8992 if (!s_fOrderChecked)
8993 {
8994 s_fOrderChecked = true;
8995 uint32_t offPrev = s_aMembers[0].off;
8996 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8997 {
8998 Assert(s_aMembers[i].off > offPrev);
8999 offPrev = s_aMembers[i].off;
9000 }
9001 }
9002#endif
9003
9004 /*
9005 * Binary lookup.
9006 */
9007 unsigned iStart = 0;
9008 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9009 for (;;)
9010 {
9011 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9012 uint32_t const offCur = s_aMembers[iCur].off;
9013 if (off < offCur)
9014 {
9015 if (iCur != iStart)
9016 iEnd = iCur;
9017 else
9018 break;
9019 }
9020 else if (off > offCur)
9021 {
9022 if (iCur + 1 < iEnd)
9023 iStart = iCur + 1;
9024 else
9025 break;
9026 }
9027 else
9028 return s_aMembers[iCur].pszName;
9029 }
9030#ifdef VBOX_WITH_STATISTICS
9031 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9032 return "iem.s.acThreadedFuncStats[iFn]";
9033#endif
9034 return NULL;
9035}
9036
9037
9038DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9039{
9040 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9041#if defined(RT_ARCH_AMD64)
9042 static const char * const a_apszMarkers[] =
9043 {
9044 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9045 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9046 };
9047#endif
9048
9049 char szDisBuf[512];
9050 DISSTATE Dis;
9051 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9052 uint32_t const cNative = pTb->Native.cInstructions;
9053 uint32_t offNative = 0;
9054#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9055 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9056#endif
9057 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9058 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9059 : DISCPUMODE_64BIT;
9060#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9061 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9062#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9063 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9064#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9065# error "Port me"
9066#else
9067 csh hDisasm = ~(size_t)0;
9068# if defined(RT_ARCH_AMD64)
9069 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9070# elif defined(RT_ARCH_ARM64)
9071 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9072# else
9073# error "Port me"
9074# endif
9075 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9076
9077 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9078 //Assert(rcCs == CS_ERR_OK);
9079#endif
9080
9081 /*
9082 * Print TB info.
9083 */
9084 pHlp->pfnPrintf(pHlp,
9085 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9086 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9087 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9088 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9089#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9090 if (pDbgInfo && pDbgInfo->cEntries > 1)
9091 {
9092 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9093
9094 /*
9095 * This disassembly is driven by the debug info which follows the native
9096 * code and indicates when it starts with the next guest instructions,
9097 * where labels are and such things.
9098 */
9099 uint32_t idxThreadedCall = 0;
9100 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9101 uint8_t idxRange = UINT8_MAX;
9102 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9103 uint32_t offRange = 0;
9104 uint32_t offOpcodes = 0;
9105 uint32_t const cbOpcodes = pTb->cbOpcodes;
9106 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9107 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9108 uint32_t iDbgEntry = 1;
9109 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9110
9111 while (offNative < cNative)
9112 {
9113 /* If we're at or have passed the point where the next chunk of debug
9114 info starts, process it. */
9115 if (offDbgNativeNext <= offNative)
9116 {
9117 offDbgNativeNext = UINT32_MAX;
9118 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9119 {
9120 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9121 {
9122 case kIemTbDbgEntryType_GuestInstruction:
9123 {
9124 /* Did the exec flag change? */
9125 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9126 {
9127 pHlp->pfnPrintf(pHlp,
9128 " fExec change %#08x -> %#08x %s\n",
9129 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9130 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9131 szDisBuf, sizeof(szDisBuf)));
9132 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9133 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9134 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9135 : DISCPUMODE_64BIT;
9136 }
9137
9138 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9139 where the compilation was aborted before the opcode was recorded and the actual
9140 instruction was translated to a threaded call. This may happen when we run out
9141 of ranges, or when some complicated interrupts/FFs are found to be pending or
9142 similar. So, we just deal with it here rather than in the compiler code as it
9143 is a lot simpler to do here. */
9144 if ( idxRange == UINT8_MAX
9145 || idxRange >= cRanges
9146 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9147 {
9148 idxRange += 1;
9149 if (idxRange < cRanges)
9150 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9151 else
9152 continue;
9153 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9154 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9155 + (pTb->aRanges[idxRange].idxPhysPage == 0
9156 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9157 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9158 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9159 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9160 pTb->aRanges[idxRange].idxPhysPage);
9161 GCPhysPc += offRange;
9162 }
9163
9164 /* Disassemble the instruction. */
9165 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9166 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9167 uint32_t cbInstr = 1;
9168 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9169 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9170 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9171 if (RT_SUCCESS(rc))
9172 {
9173 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9174 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9175 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9176 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9177
9178 static unsigned const s_offMarker = 55;
9179 static char const s_szMarker[] = " ; <--- guest";
9180 if (cch < s_offMarker)
9181 {
9182 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9183 cch = s_offMarker;
9184 }
9185 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9186 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9187
9188 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9189 }
9190 else
9191 {
9192 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9193 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9194 cbInstr = 1;
9195 }
9196 GCPhysPc += cbInstr;
9197 offOpcodes += cbInstr;
9198 offRange += cbInstr;
9199 continue;
9200 }
9201
9202 case kIemTbDbgEntryType_ThreadedCall:
9203 pHlp->pfnPrintf(pHlp,
9204 " Call #%u to %s (%u args) - %s\n",
9205 idxThreadedCall,
9206 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9207 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9208 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9209 idxThreadedCall++;
9210 continue;
9211
9212 case kIemTbDbgEntryType_GuestRegShadowing:
9213 {
9214 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9215 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9216 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9217 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9218 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9219 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9220 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9221 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9222 else
9223 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9224 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9225 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9226 continue;
9227 }
9228
9229#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9230 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9231 {
9232 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9233 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9234 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9235 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9236 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9237 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9238 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9239 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9240 else
9241 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9242 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9243 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9244 continue;
9245 }
9246#endif
9247
9248 case kIemTbDbgEntryType_Label:
9249 {
9250 const char *pszName = "what_the_fudge";
9251 const char *pszComment = "";
9252 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9253 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9254 {
9255 case kIemNativeLabelType_Return: pszName = "Return"; break;
9256 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9257 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9258 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9259 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9260 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9261 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9262 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9263 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
9264 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9265 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9266 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9267 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9268 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9269 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9270 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9271 case kIemNativeLabelType_If:
9272 pszName = "If";
9273 fNumbered = true;
9274 break;
9275 case kIemNativeLabelType_Else:
9276 pszName = "Else";
9277 fNumbered = true;
9278 pszComment = " ; regs state restored pre-if-block";
9279 break;
9280 case kIemNativeLabelType_Endif:
9281 pszName = "Endif";
9282 fNumbered = true;
9283 break;
9284 case kIemNativeLabelType_CheckIrq:
9285 pszName = "CheckIrq_CheckVM";
9286 fNumbered = true;
9287 break;
9288 case kIemNativeLabelType_TlbLookup:
9289 pszName = "TlbLookup";
9290 fNumbered = true;
9291 break;
9292 case kIemNativeLabelType_TlbMiss:
9293 pszName = "TlbMiss";
9294 fNumbered = true;
9295 break;
9296 case kIemNativeLabelType_TlbDone:
9297 pszName = "TlbDone";
9298 fNumbered = true;
9299 break;
9300 case kIemNativeLabelType_Invalid:
9301 case kIemNativeLabelType_End:
9302 break;
9303 }
9304 if (fNumbered)
9305 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9306 else
9307 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9308 continue;
9309 }
9310
9311 case kIemTbDbgEntryType_NativeOffset:
9312 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9313 Assert(offDbgNativeNext > offNative);
9314 break;
9315
9316#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9317 case kIemTbDbgEntryType_DelayedPcUpdate:
9318 pHlp->pfnPrintf(pHlp,
9319 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9320 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9321 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9322 continue;
9323#endif
9324
9325 default:
9326 AssertFailed();
9327 }
9328 iDbgEntry++;
9329 break;
9330 }
9331 }
9332
9333 /*
9334 * Disassemble the next native instruction.
9335 */
9336 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9337# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9338 uint32_t cbInstr = sizeof(paNative[0]);
9339 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9340 if (RT_SUCCESS(rc))
9341 {
9342# if defined(RT_ARCH_AMD64)
9343 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9344 {
9345 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9346 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9347 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9348 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9349 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9350 uInfo & 0x8000 ? "recompiled" : "todo");
9351 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9352 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9353 else
9354 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9355 }
9356 else
9357# endif
9358 {
9359 const char *pszAnnotation = NULL;
9360# ifdef RT_ARCH_AMD64
9361 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9362 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9363 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9364 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9365 PCDISOPPARAM pMemOp;
9366 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9367 pMemOp = &Dis.Param1;
9368 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9369 pMemOp = &Dis.Param2;
9370 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9371 pMemOp = &Dis.Param3;
9372 else
9373 pMemOp = NULL;
9374 if ( pMemOp
9375 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9376 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9377 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9378 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9379
9380#elif defined(RT_ARCH_ARM64)
9381 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9382 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9383 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9384# else
9385# error "Port me"
9386# endif
9387 if (pszAnnotation)
9388 {
9389 static unsigned const s_offAnnotation = 55;
9390 size_t const cchAnnotation = strlen(pszAnnotation);
9391 size_t cchDis = strlen(szDisBuf);
9392 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9393 {
9394 if (cchDis < s_offAnnotation)
9395 {
9396 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9397 cchDis = s_offAnnotation;
9398 }
9399 szDisBuf[cchDis++] = ' ';
9400 szDisBuf[cchDis++] = ';';
9401 szDisBuf[cchDis++] = ' ';
9402 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9403 }
9404 }
9405 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9406 }
9407 }
9408 else
9409 {
9410# if defined(RT_ARCH_AMD64)
9411 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9412 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9413# elif defined(RT_ARCH_ARM64)
9414 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9415# else
9416# error "Port me"
9417# endif
9418 cbInstr = sizeof(paNative[0]);
9419 }
9420 offNative += cbInstr / sizeof(paNative[0]);
9421
9422# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9423 cs_insn *pInstr;
9424 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9425 (uintptr_t)pNativeCur, 1, &pInstr);
9426 if (cInstrs > 0)
9427 {
9428 Assert(cInstrs == 1);
9429 const char *pszAnnotation = NULL;
9430# if defined(RT_ARCH_ARM64)
9431 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9432 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9433 {
9434 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9435 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9436 char *psz = strchr(pInstr->op_str, '[');
9437 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9438 {
9439 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9440 int32_t off = -1;
9441 psz += 4;
9442 if (*psz == ']')
9443 off = 0;
9444 else if (*psz == ',')
9445 {
9446 psz = RTStrStripL(psz + 1);
9447 if (*psz == '#')
9448 off = RTStrToInt32(&psz[1]);
9449 /** @todo deal with index registers and LSL as well... */
9450 }
9451 if (off >= 0)
9452 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9453 }
9454 }
9455# endif
9456
9457 size_t const cchOp = strlen(pInstr->op_str);
9458# if defined(RT_ARCH_AMD64)
9459 if (pszAnnotation)
9460 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9461 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9462 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9463 else
9464 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9465 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9466
9467# else
9468 if (pszAnnotation)
9469 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9470 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9471 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9472 else
9473 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9474 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9475# endif
9476 offNative += pInstr->size / sizeof(*pNativeCur);
9477 cs_free(pInstr, cInstrs);
9478 }
9479 else
9480 {
9481# if defined(RT_ARCH_AMD64)
9482 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9483 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9484# else
9485 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9486# endif
9487 offNative++;
9488 }
9489# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9490 }
9491 }
9492 else
9493#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9494 {
9495 /*
9496 * No debug info, just disassemble the x86 code and then the native code.
9497 *
9498 * First the guest code:
9499 */
9500 for (unsigned i = 0; i < pTb->cRanges; i++)
9501 {
9502 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9503 + (pTb->aRanges[i].idxPhysPage == 0
9504 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9505 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9506 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9507 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9508 unsigned off = pTb->aRanges[i].offOpcodes;
9509 /** @todo this ain't working when crossing pages! */
9510 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9511 while (off < cbOpcodes)
9512 {
9513 uint32_t cbInstr = 1;
9514 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9515 &pTb->pabOpcodes[off], cbOpcodes - off,
9516 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9517 if (RT_SUCCESS(rc))
9518 {
9519 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9520 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9521 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9522 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9523 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9524 GCPhysPc += cbInstr;
9525 off += cbInstr;
9526 }
9527 else
9528 {
9529 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9530 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9531 break;
9532 }
9533 }
9534 }
9535
9536 /*
9537 * Then the native code:
9538 */
9539 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9540 while (offNative < cNative)
9541 {
9542 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9543# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9544 uint32_t cbInstr = sizeof(paNative[0]);
9545 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9546 if (RT_SUCCESS(rc))
9547 {
9548# if defined(RT_ARCH_AMD64)
9549 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9550 {
9551 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9552 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9553 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9554 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9555 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9556 uInfo & 0x8000 ? "recompiled" : "todo");
9557 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9558 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9559 else
9560 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9561 }
9562 else
9563# endif
9564 {
9565# ifdef RT_ARCH_AMD64
9566 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9567 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9568 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9569 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9570# elif defined(RT_ARCH_ARM64)
9571 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9572 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9573 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9574# else
9575# error "Port me"
9576# endif
9577 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9578 }
9579 }
9580 else
9581 {
9582# if defined(RT_ARCH_AMD64)
9583 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9584 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9585# else
9586 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9587# endif
9588 cbInstr = sizeof(paNative[0]);
9589 }
9590 offNative += cbInstr / sizeof(paNative[0]);
9591
9592# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9593 cs_insn *pInstr;
9594 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9595 (uintptr_t)pNativeCur, 1, &pInstr);
9596 if (cInstrs > 0)
9597 {
9598 Assert(cInstrs == 1);
9599# if defined(RT_ARCH_AMD64)
9600 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9601 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9602# else
9603 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9604 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9605# endif
9606 offNative += pInstr->size / sizeof(*pNativeCur);
9607 cs_free(pInstr, cInstrs);
9608 }
9609 else
9610 {
9611# if defined(RT_ARCH_AMD64)
9612 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9613 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9614# else
9615 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9616# endif
9617 offNative++;
9618 }
9619# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9620 }
9621 }
9622
9623#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9624 /* Cleanup. */
9625 cs_close(&hDisasm);
9626#endif
9627}
9628
9629
9630/**
9631 * Recompiles the given threaded TB into a native one.
9632 *
9633 * In case of failure the translation block will be returned as-is.
9634 *
9635 * @returns pTb.
9636 * @param pVCpu The cross context virtual CPU structure of the calling
9637 * thread.
9638 * @param pTb The threaded translation to recompile to native.
9639 */
9640DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9641{
9642 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9643
9644 /*
9645 * The first time thru, we allocate the recompiler state, the other times
9646 * we just need to reset it before using it again.
9647 */
9648 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9649 if (RT_LIKELY(pReNative))
9650 iemNativeReInit(pReNative, pTb);
9651 else
9652 {
9653 pReNative = iemNativeInit(pVCpu, pTb);
9654 AssertReturn(pReNative, pTb);
9655 }
9656
9657#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9658 /*
9659 * First do liveness analysis. This is done backwards.
9660 */
9661 {
9662 uint32_t idxCall = pTb->Thrd.cCalls;
9663 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9664 { /* likely */ }
9665 else
9666 {
9667 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9668 while (idxCall > cAlloc)
9669 cAlloc *= 2;
9670 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9671 AssertReturn(pvNew, pTb);
9672 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9673 pReNative->cLivenessEntriesAlloc = cAlloc;
9674 }
9675 AssertReturn(idxCall > 0, pTb);
9676 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9677
9678 /* The initial (final) entry. */
9679 idxCall--;
9680 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9681
9682 /* Loop backwards thru the calls and fill in the other entries. */
9683 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9684 while (idxCall > 0)
9685 {
9686 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9687 if (pfnLiveness)
9688 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9689 else
9690 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9691 pCallEntry--;
9692 idxCall--;
9693 }
9694
9695# ifdef VBOX_WITH_STATISTICS
9696 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9697 to 'clobbered' rather that 'input'. */
9698 /** @todo */
9699# endif
9700 }
9701#endif
9702
9703 /*
9704 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9705 * for aborting if an error happens.
9706 */
9707 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9708#ifdef LOG_ENABLED
9709 uint32_t const cCallsOrg = cCallsLeft;
9710#endif
9711 uint32_t off = 0;
9712 int rc = VINF_SUCCESS;
9713 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9714 {
9715 /*
9716 * Emit prolog code (fixed).
9717 */
9718 off = iemNativeEmitProlog(pReNative, off);
9719
9720 /*
9721 * Convert the calls to native code.
9722 */
9723#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9724 int32_t iGstInstr = -1;
9725#endif
9726#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9727 uint32_t cThreadedCalls = 0;
9728 uint32_t cRecompiledCalls = 0;
9729#endif
9730#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9731 uint32_t idxCurCall = 0;
9732#endif
9733 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9734 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9735 while (cCallsLeft-- > 0)
9736 {
9737 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9738#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9739 pReNative->idxCurCall = idxCurCall;
9740#endif
9741
9742 /*
9743 * Debug info, assembly markup and statistics.
9744 */
9745#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9746 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9747 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9748#endif
9749#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9750 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9751 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9752 {
9753 if (iGstInstr < (int32_t)pTb->cInstructions)
9754 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9755 else
9756 Assert(iGstInstr == pTb->cInstructions);
9757 iGstInstr = pCallEntry->idxInstr;
9758 }
9759 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9760#endif
9761#if defined(VBOX_STRICT)
9762 off = iemNativeEmitMarker(pReNative, off,
9763 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9764#endif
9765#if defined(VBOX_STRICT)
9766 iemNativeRegAssertSanity(pReNative);
9767#endif
9768#ifdef VBOX_WITH_STATISTICS
9769 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9770#endif
9771
9772 /*
9773 * Actual work.
9774 */
9775 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9776 pfnRecom ? "(recompiled)" : "(todo)"));
9777 if (pfnRecom) /** @todo stats on this. */
9778 {
9779 off = pfnRecom(pReNative, off, pCallEntry);
9780 STAM_REL_STATS({cRecompiledCalls++;});
9781 }
9782 else
9783 {
9784 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9785 STAM_REL_STATS({cThreadedCalls++;});
9786 }
9787 Assert(off <= pReNative->cInstrBufAlloc);
9788 Assert(pReNative->cCondDepth == 0);
9789
9790#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9791 if (LogIs2Enabled())
9792 {
9793 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9794# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9795 static const char s_achState[] = "CUXI";
9796# else
9797 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9798# endif
9799
9800 char szGpr[17];
9801 for (unsigned i = 0; i < 16; i++)
9802 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9803 szGpr[16] = '\0';
9804
9805 char szSegBase[X86_SREG_COUNT + 1];
9806 char szSegLimit[X86_SREG_COUNT + 1];
9807 char szSegAttrib[X86_SREG_COUNT + 1];
9808 char szSegSel[X86_SREG_COUNT + 1];
9809 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9810 {
9811 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9812 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9813 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9814 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9815 }
9816 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9817 = szSegSel[X86_SREG_COUNT] = '\0';
9818
9819 char szEFlags[8];
9820 for (unsigned i = 0; i < 7; i++)
9821 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9822 szEFlags[7] = '\0';
9823
9824 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9825 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9826 }
9827#endif
9828
9829 /*
9830 * Advance.
9831 */
9832 pCallEntry++;
9833#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9834 idxCurCall++;
9835#endif
9836 }
9837
9838 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9839 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9840 if (!cThreadedCalls)
9841 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9842
9843 /*
9844 * Emit the epilog code.
9845 */
9846 uint32_t idxReturnLabel;
9847 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9848
9849 /*
9850 * Generate special jump labels.
9851 */
9852 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9853 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9854 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9855 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9856
9857 /*
9858 * Generate simple TB tail labels that just calls a help with a pVCpu
9859 * arg and either return or longjmps/throws a non-zero status.
9860 *
9861 * The array entries must be ordered by enmLabel value so we can index
9862 * using fTailLabels bit numbers.
9863 */
9864 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9865 static struct
9866 {
9867 IEMNATIVELABELTYPE enmLabel;
9868 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9869 } const g_aSimpleTailLabels[] =
9870 {
9871 { kIemNativeLabelType_Invalid, NULL },
9872 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9873 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9874 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9875 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9876 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9877 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9878 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9879 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9880 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9881 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9882 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9883 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9884 };
9885 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9886 AssertCompile(kIemNativeLabelType_Invalid == 0);
9887 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9888 if (fTailLabels)
9889 {
9890 do
9891 {
9892 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9893 fTailLabels &= ~RT_BIT_64(enmLabel);
9894 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9895
9896 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9897 Assert(idxLabel != UINT32_MAX);
9898 if (idxLabel != UINT32_MAX)
9899 {
9900 iemNativeLabelDefine(pReNative, idxLabel, off);
9901
9902 /* int pfnCallback(PVMCPUCC pVCpu) */
9903 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9904 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9905
9906 /* jump back to the return sequence. */
9907 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9908 }
9909
9910 } while (fTailLabels);
9911 }
9912 }
9913 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9914 {
9915 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9916 return pTb;
9917 }
9918 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9919 Assert(off <= pReNative->cInstrBufAlloc);
9920
9921 /*
9922 * Make sure all labels has been defined.
9923 */
9924 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9925#ifdef VBOX_STRICT
9926 uint32_t const cLabels = pReNative->cLabels;
9927 for (uint32_t i = 0; i < cLabels; i++)
9928 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9929#endif
9930
9931 /*
9932 * Allocate executable memory, copy over the code we've generated.
9933 */
9934 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9935 if (pTbAllocator->pDelayedFreeHead)
9936 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9937
9938 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9939 AssertReturn(paFinalInstrBuf, pTb);
9940 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9941
9942 /*
9943 * Apply fixups.
9944 */
9945 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9946 uint32_t const cFixups = pReNative->cFixups;
9947 for (uint32_t i = 0; i < cFixups; i++)
9948 {
9949 Assert(paFixups[i].off < off);
9950 Assert(paFixups[i].idxLabel < cLabels);
9951 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9952 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9953 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9954 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9955 switch (paFixups[i].enmType)
9956 {
9957#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9958 case kIemNativeFixupType_Rel32:
9959 Assert(paFixups[i].off + 4 <= off);
9960 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9961 continue;
9962
9963#elif defined(RT_ARCH_ARM64)
9964 case kIemNativeFixupType_RelImm26At0:
9965 {
9966 Assert(paFixups[i].off < off);
9967 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9968 Assert(offDisp >= -262144 && offDisp < 262144);
9969 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9970 continue;
9971 }
9972
9973 case kIemNativeFixupType_RelImm19At5:
9974 {
9975 Assert(paFixups[i].off < off);
9976 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9977 Assert(offDisp >= -262144 && offDisp < 262144);
9978 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9979 continue;
9980 }
9981
9982 case kIemNativeFixupType_RelImm14At5:
9983 {
9984 Assert(paFixups[i].off < off);
9985 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9986 Assert(offDisp >= -8192 && offDisp < 8192);
9987 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9988 continue;
9989 }
9990
9991#endif
9992 case kIemNativeFixupType_Invalid:
9993 case kIemNativeFixupType_End:
9994 break;
9995 }
9996 AssertFailed();
9997 }
9998
9999 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10000 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10001
10002 /*
10003 * Convert the translation block.
10004 */
10005 RTMemFree(pTb->Thrd.paCalls);
10006 pTb->Native.paInstructions = paFinalInstrBuf;
10007 pTb->Native.cInstructions = off;
10008 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10009#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10010 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10011 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10012#endif
10013
10014 Assert(pTbAllocator->cThreadedTbs > 0);
10015 pTbAllocator->cThreadedTbs -= 1;
10016 pTbAllocator->cNativeTbs += 1;
10017 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10018
10019#ifdef LOG_ENABLED
10020 /*
10021 * Disassemble to the log if enabled.
10022 */
10023 if (LogIs3Enabled())
10024 {
10025 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10026 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10027# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10028 RTLogFlush(NULL);
10029# endif
10030 }
10031#endif
10032 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10033
10034 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10035 return pTb;
10036}
10037
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette