VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103838

Last change on this file since 103838 was 103838, checked in by vboxsync, 11 months ago

VMM/IEM: Emit the IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE()/IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT()/IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() checks only once per TB or after a helper was called which could modify cr0/cr4/xcr0. Most of the time the check can be omitted for SIMD code, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 405.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103838 2024-03-13 20:06:55Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#NM.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseDeviceNotAvailableJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise a \#UD.
1607 */
1608IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1609{
1610 iemRaiseUndefinedOpcodeJmp(pVCpu);
1611#ifndef _MSC_VER
1612 return VINF_IEM_RAISED_XCPT; /* not reached */
1613#endif
1614}
1615
1616
1617/**
1618 * Used by TB code when it wants to raise a \#MF.
1619 */
1620IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1621{
1622 iemRaiseMathFaultJmp(pVCpu);
1623#ifndef _MSC_VER
1624 return VINF_IEM_RAISED_XCPT; /* not reached */
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code when it wants to raise a \#XF.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1633{
1634 iemRaiseSimdFpExceptionJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when detecting opcode changes.
1643 * @see iemThreadeFuncWorkerObsoleteTb
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1646{
1647 /* We set fSafeToFree to false where as we're being called in the context
1648 of a TB callback function, which for native TBs means we cannot release
1649 the executable memory till we've returned our way back to iemTbExec as
1650 that return path codes via the native code generated for the TB. */
1651 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1652 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1653 return VINF_IEM_REEXEC_BREAK;
1654}
1655
1656
1657/**
1658 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1661{
1662 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1663 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1664 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1665 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1666 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1667 return VINF_IEM_REEXEC_BREAK;
1668}
1669
1670
1671/**
1672 * Used by TB code when we missed a PC check after a branch.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1675{
1676 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1677 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1678 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1679 pVCpu->iem.s.pbInstrBuf));
1680 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1681 return VINF_IEM_REEXEC_BREAK;
1682}
1683
1684
1685
1686/*********************************************************************************************************************************
1687* Helpers: Segmented memory fetches and stores. *
1688*********************************************************************************************************************************/
1689
1690/**
1691 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1694{
1695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1696 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1697#else
1698 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1699#endif
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1705 * to 16 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1710 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1711#else
1712 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1713#endif
1714}
1715
1716
1717/**
1718 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1719 * to 32 bits.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1724 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1725#else
1726 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1727#endif
1728}
1729
1730/**
1731 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1732 * to 64 bits.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1737 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1738#else
1739 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1750 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1751#else
1752 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1759 * to 32 bits.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1764 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1765#else
1766 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1773 * to 64 bits.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1778 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1779#else
1780 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1791 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1792#else
1793 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1800 * to 64 bits.
1801 */
1802IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1803{
1804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1805 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1806#else
1807 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1808#endif
1809}
1810
1811
1812/**
1813 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1814 */
1815IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1816{
1817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1818 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1819#else
1820 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1821#endif
1822}
1823
1824
1825/**
1826 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1831 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1832#else
1833 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1844 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1845#else
1846 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1857 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1858#else
1859 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1870 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1871#else
1872 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1873#endif
1874}
1875
1876
1877
1878/**
1879 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1884 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1885#else
1886 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1897 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1898#else
1899 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to store an 32-bit selector value onto a generic stack.
1906 *
1907 * Intel CPUs doesn't do write a whole dword, thus the special function.
1908 */
1909IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1910{
1911#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1912 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1913#else
1914 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1915#endif
1916}
1917
1918
1919/**
1920 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1921 */
1922IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1923{
1924#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1925 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1926#else
1927 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1928#endif
1929}
1930
1931
1932/**
1933 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1934 */
1935IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1936{
1937#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1938 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1939#else
1940 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1941#endif
1942}
1943
1944
1945/**
1946 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1947 */
1948IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1949{
1950#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1951 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1952#else
1953 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1954#endif
1955}
1956
1957
1958/**
1959 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1960 */
1961IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1962{
1963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1964 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1965#else
1966 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1967#endif
1968}
1969
1970
1971
1972/*********************************************************************************************************************************
1973* Helpers: Flat memory fetches and stores. *
1974*********************************************************************************************************************************/
1975
1976/**
1977 * Used by TB code to load unsigned 8-bit data w/ flat address.
1978 * @note Zero extending the value to 64-bit to simplify assembly.
1979 */
1980IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1981{
1982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1983 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1984#else
1985 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1986#endif
1987}
1988
1989
1990/**
1991 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1992 * to 16 bits.
1993 * @note Zero extending the value to 64-bit to simplify assembly.
1994 */
1995IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1996{
1997#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1998 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1999#else
2000 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2001#endif
2002}
2003
2004
2005/**
2006 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2007 * to 32 bits.
2008 * @note Zero extending the value to 64-bit to simplify assembly.
2009 */
2010IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2011{
2012#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2013 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2014#else
2015 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2016#endif
2017}
2018
2019
2020/**
2021 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2022 * to 64 bits.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2027 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2028#else
2029 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to load unsigned 16-bit data w/ flat address.
2036 * @note Zero extending the value to 64-bit to simplify assembly.
2037 */
2038IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2039{
2040#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2041 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2042#else
2043 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2044#endif
2045}
2046
2047
2048/**
2049 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2050 * to 32 bits.
2051 * @note Zero extending the value to 64-bit to simplify assembly.
2052 */
2053IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2054{
2055#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2056 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2057#else
2058 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2059#endif
2060}
2061
2062
2063/**
2064 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2065 * to 64 bits.
2066 * @note Zero extending the value to 64-bit to simplify assembly.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to load unsigned 32-bit data w/ flat address.
2080 * @note Zero extending the value to 64-bit to simplify assembly.
2081 */
2082IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2083{
2084#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2085 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2086#else
2087 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2088#endif
2089}
2090
2091
2092/**
2093 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2094 * to 64 bits.
2095 * @note Zero extending the value to 64-bit to simplify assembly.
2096 */
2097IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2098{
2099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2100 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2101#else
2102 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2103#endif
2104}
2105
2106
2107/**
2108 * Used by TB code to load unsigned 64-bit data w/ flat address.
2109 */
2110IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2111{
2112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2113 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2114#else
2115 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2116#endif
2117}
2118
2119
2120/**
2121 * Used by TB code to store unsigned 8-bit data w/ flat address.
2122 */
2123IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2124{
2125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2126 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2127#else
2128 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2129#endif
2130}
2131
2132
2133/**
2134 * Used by TB code to store unsigned 16-bit data w/ flat address.
2135 */
2136IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2137{
2138#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2139 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2140#else
2141 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2142#endif
2143}
2144
2145
2146/**
2147 * Used by TB code to store unsigned 32-bit data w/ flat address.
2148 */
2149IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2150{
2151#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2152 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2153#else
2154 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2155#endif
2156}
2157
2158
2159/**
2160 * Used by TB code to store unsigned 64-bit data w/ flat address.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2165 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2166#else
2167 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2168#endif
2169}
2170
2171
2172
2173/**
2174 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2175 */
2176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2177{
2178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2179 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2180#else
2181 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2182#endif
2183}
2184
2185
2186/**
2187 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2188 */
2189IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2190{
2191#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2192 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2193#else
2194 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2195#endif
2196}
2197
2198
2199/**
2200 * Used by TB code to store a segment selector value onto a flat stack.
2201 *
2202 * Intel CPUs doesn't do write a whole dword, thus the special function.
2203 */
2204IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2205{
2206#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2207 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2208#else
2209 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2210#endif
2211}
2212
2213
2214/**
2215 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2216 */
2217IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2218{
2219#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2220 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2221#else
2222 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2223#endif
2224}
2225
2226
2227/**
2228 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2229 */
2230IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2231{
2232#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2233 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2234#else
2235 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2236#endif
2237}
2238
2239
2240/**
2241 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2242 */
2243IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2244{
2245#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2246 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2247#else
2248 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2249#endif
2250}
2251
2252
2253/**
2254 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2255 */
2256IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2257{
2258#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2259 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2260#else
2261 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2262#endif
2263}
2264
2265
2266
2267/*********************************************************************************************************************************
2268* Helpers: Segmented memory mapping. *
2269*********************************************************************************************************************************/
2270
2271/**
2272 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2273 * segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2288 */
2289IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2290 RTGCPTR GCPtrMem, uint8_t iSegReg))
2291{
2292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2293 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2294#else
2295 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2296#endif
2297}
2298
2299
2300/**
2301 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2302 */
2303IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2304 RTGCPTR GCPtrMem, uint8_t iSegReg))
2305{
2306#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2307 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2308#else
2309 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2310#endif
2311}
2312
2313
2314/**
2315 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2316 */
2317IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2318 RTGCPTR GCPtrMem, uint8_t iSegReg))
2319{
2320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2321 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2322#else
2323 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2324#endif
2325}
2326
2327
2328/**
2329 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2330 * segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2345 */
2346IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2347 RTGCPTR GCPtrMem, uint8_t iSegReg))
2348{
2349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2350 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2351#else
2352 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2353#endif
2354}
2355
2356
2357/**
2358 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2361 RTGCPTR GCPtrMem, uint8_t iSegReg))
2362{
2363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2364 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2365#else
2366 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2367#endif
2368}
2369
2370
2371/**
2372 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2373 */
2374IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2375 RTGCPTR GCPtrMem, uint8_t iSegReg))
2376{
2377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2378 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2379#else
2380 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2381#endif
2382}
2383
2384
2385/**
2386 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2387 * segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2402 */
2403IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2404 RTGCPTR GCPtrMem, uint8_t iSegReg))
2405{
2406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2407 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2408#else
2409 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2410#endif
2411}
2412
2413
2414/**
2415 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2416 */
2417IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2418 RTGCPTR GCPtrMem, uint8_t iSegReg))
2419{
2420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2421 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2422#else
2423 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2424#endif
2425}
2426
2427
2428/**
2429 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2430 */
2431IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2432 RTGCPTR GCPtrMem, uint8_t iSegReg))
2433{
2434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2435 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2436#else
2437 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2438#endif
2439}
2440
2441
2442/**
2443 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2444 * segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2487 */
2488IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2489 RTGCPTR GCPtrMem, uint8_t iSegReg))
2490{
2491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2492 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2493#else
2494 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2495#endif
2496}
2497
2498
2499/**
2500 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2501 */
2502IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2503 RTGCPTR GCPtrMem, uint8_t iSegReg))
2504{
2505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2506 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2507#else
2508 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2509#endif
2510}
2511
2512
2513/**
2514 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2515 */
2516IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2517 RTGCPTR GCPtrMem, uint8_t iSegReg))
2518{
2519#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2520 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2521#else
2522 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2523#endif
2524}
2525
2526
2527/**
2528 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2529 * segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/**
2543 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2544 */
2545IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2546 RTGCPTR GCPtrMem, uint8_t iSegReg))
2547{
2548#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2549 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2550#else
2551 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2552#endif
2553}
2554
2555
2556/**
2557 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2558 */
2559IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2560 RTGCPTR GCPtrMem, uint8_t iSegReg))
2561{
2562#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2563 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2564#else
2565 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2566#endif
2567}
2568
2569
2570/**
2571 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2572 */
2573IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2574 RTGCPTR GCPtrMem, uint8_t iSegReg))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2578#else
2579 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2580#endif
2581}
2582
2583
2584/*********************************************************************************************************************************
2585* Helpers: Flat memory mapping. *
2586*********************************************************************************************************************************/
2587
2588/**
2589 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2590 * address.
2591 */
2592IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2593{
2594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2595 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2596#else
2597 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2598#endif
2599}
2600
2601
2602/**
2603 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2604 */
2605IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2606{
2607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2608 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2609#else
2610 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2611#endif
2612}
2613
2614
2615/**
2616 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2617 */
2618IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2619{
2620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2621 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2622#else
2623 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2624#endif
2625}
2626
2627
2628/**
2629 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2630 */
2631IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2632{
2633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2634 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2635#else
2636 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2637#endif
2638}
2639
2640
2641/**
2642 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2643 * address.
2644 */
2645IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2646{
2647#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2648 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2649#else
2650 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2651#endif
2652}
2653
2654
2655/**
2656 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2657 */
2658IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2659{
2660#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2661 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2662#else
2663 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2664#endif
2665}
2666
2667
2668/**
2669 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2670 */
2671IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2672{
2673#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2674 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2675#else
2676 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2677#endif
2678}
2679
2680
2681/**
2682 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2683 */
2684IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2685{
2686#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2687 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2688#else
2689 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2690#endif
2691}
2692
2693
2694/**
2695 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2696 * address.
2697 */
2698IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2699{
2700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2701 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2702#else
2703 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2704#endif
2705}
2706
2707
2708/**
2709 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2710 */
2711IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2715#else
2716 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2725{
2726#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2727 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2728#else
2729 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2730#endif
2731}
2732
2733
2734/**
2735 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2736 */
2737IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2738{
2739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2740 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2741#else
2742 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2743#endif
2744}
2745
2746
2747/**
2748 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2749 * address.
2750 */
2751IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2752{
2753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2754 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2755#else
2756 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2757#endif
2758}
2759
2760
2761/**
2762 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2763 */
2764IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2765{
2766#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2767 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2768#else
2769 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2770#endif
2771}
2772
2773
2774/**
2775 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2776 */
2777IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2778{
2779#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2780 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2781#else
2782 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2783#endif
2784}
2785
2786
2787/**
2788 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2789 */
2790IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2791{
2792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2793 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2794#else
2795 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2796#endif
2797}
2798
2799
2800/**
2801 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2802 */
2803IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2804{
2805#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2806 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2807#else
2808 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2809#endif
2810}
2811
2812
2813/**
2814 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2815 */
2816IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2817{
2818#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2819 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2820#else
2821 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2822#endif
2823}
2824
2825
2826/**
2827 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2828 * address.
2829 */
2830IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2831{
2832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2833 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2834#else
2835 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2836#endif
2837}
2838
2839
2840/**
2841 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2842 */
2843IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2844{
2845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2846 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2847#else
2848 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2849#endif
2850}
2851
2852
2853/**
2854 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2855 */
2856IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2857{
2858#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2859 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2860#else
2861 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2862#endif
2863}
2864
2865
2866/**
2867 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2868 */
2869IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2870{
2871#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2872 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2873#else
2874 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2875#endif
2876}
2877
2878
2879/*********************************************************************************************************************************
2880* Helpers: Commit, rollback & unmap *
2881*********************************************************************************************************************************/
2882
2883/**
2884 * Used by TB code to commit and unmap a read-write memory mapping.
2885 */
2886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2887{
2888 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2889}
2890
2891
2892/**
2893 * Used by TB code to commit and unmap a read-write memory mapping.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2896{
2897 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2898}
2899
2900
2901/**
2902 * Used by TB code to commit and unmap a write-only memory mapping.
2903 */
2904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2905{
2906 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2907}
2908
2909
2910/**
2911 * Used by TB code to commit and unmap a read-only memory mapping.
2912 */
2913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2914{
2915 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2916}
2917
2918
2919/**
2920 * Reinitializes the native recompiler state.
2921 *
2922 * Called before starting a new recompile job.
2923 */
2924static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2925{
2926 pReNative->cLabels = 0;
2927 pReNative->bmLabelTypes = 0;
2928 pReNative->cFixups = 0;
2929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2930 pReNative->pDbgInfo->cEntries = 0;
2931#endif
2932 pReNative->pTbOrg = pTb;
2933 pReNative->cCondDepth = 0;
2934 pReNative->uCondSeqNo = 0;
2935 pReNative->uCheckIrqSeqNo = 0;
2936 pReNative->uTlbSeqNo = 0;
2937
2938#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2939 pReNative->Core.offPc = 0;
2940 pReNative->Core.cInstrPcUpdateSkipped = 0;
2941#endif
2942#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2943 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2944#endif
2945 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2946#if IEMNATIVE_HST_GREG_COUNT < 32
2947 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2948#endif
2949 ;
2950 pReNative->Core.bmHstRegsWithGstShadow = 0;
2951 pReNative->Core.bmGstRegShadows = 0;
2952 pReNative->Core.bmVars = 0;
2953 pReNative->Core.bmStack = 0;
2954 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2955 pReNative->Core.u64ArgVars = UINT64_MAX;
2956
2957 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2958 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2959 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2960 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2961 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2962 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2963 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2964 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2965 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2971
2972 /* Full host register reinit: */
2973 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2974 {
2975 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2976 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2977 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2978 }
2979
2980 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2981 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2982#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2983 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2984#endif
2985#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2986 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2987#endif
2988#ifdef IEMNATIVE_REG_FIXED_TMP1
2989 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2990#endif
2991#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2992 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2993#endif
2994 );
2995 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2996 {
2997 fRegs &= ~RT_BIT_32(idxReg);
2998 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2999 }
3000
3001 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3002#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3003 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3004#endif
3005#ifdef IEMNATIVE_REG_FIXED_TMP0
3006 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3007#endif
3008#ifdef IEMNATIVE_REG_FIXED_TMP1
3009 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3010#endif
3011#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3012 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3013#endif
3014
3015#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3016# ifdef RT_ARCH_ARM64
3017 /*
3018 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3019 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3020 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3021 * and the register allocator assumes that it will be always free when the lower is picked.
3022 */
3023 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3024# else
3025 uint32_t const fFixedAdditional = 0;
3026# endif
3027
3028 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3029 | fFixedAdditional
3030# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3031 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3032# endif
3033 ;
3034 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3035 pReNative->Core.bmGstSimdRegShadows = 0;
3036 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3037 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3038
3039 /* Full host register reinit: */
3040 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3041 {
3042 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3043 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3044 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3045 }
3046
3047 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3048 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3049 {
3050 fRegs &= ~RT_BIT_32(idxReg);
3051 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3052 }
3053
3054#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3055 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3056#endif
3057
3058#endif
3059
3060 return pReNative;
3061}
3062
3063
3064/**
3065 * Allocates and initializes the native recompiler state.
3066 *
3067 * This is called the first time an EMT wants to recompile something.
3068 *
3069 * @returns Pointer to the new recompiler state.
3070 * @param pVCpu The cross context virtual CPU structure of the calling
3071 * thread.
3072 * @param pTb The TB that's about to be recompiled.
3073 * @thread EMT(pVCpu)
3074 */
3075static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3076{
3077 VMCPU_ASSERT_EMT(pVCpu);
3078
3079 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3080 AssertReturn(pReNative, NULL);
3081
3082 /*
3083 * Try allocate all the buffers and stuff we need.
3084 */
3085 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3086 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3087 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3088#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3089 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3090#endif
3091 if (RT_LIKELY( pReNative->pInstrBuf
3092 && pReNative->paLabels
3093 && pReNative->paFixups)
3094#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3095 && pReNative->pDbgInfo
3096#endif
3097 )
3098 {
3099 /*
3100 * Set the buffer & array sizes on success.
3101 */
3102 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3103 pReNative->cLabelsAlloc = _8K;
3104 pReNative->cFixupsAlloc = _16K;
3105#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3106 pReNative->cDbgInfoAlloc = _16K;
3107#endif
3108
3109 /* Other constant stuff: */
3110 pReNative->pVCpu = pVCpu;
3111
3112 /*
3113 * Done, just need to save it and reinit it.
3114 */
3115 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3116 return iemNativeReInit(pReNative, pTb);
3117 }
3118
3119 /*
3120 * Failed. Cleanup and return.
3121 */
3122 AssertFailed();
3123 RTMemFree(pReNative->pInstrBuf);
3124 RTMemFree(pReNative->paLabels);
3125 RTMemFree(pReNative->paFixups);
3126#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3127 RTMemFree(pReNative->pDbgInfo);
3128#endif
3129 RTMemFree(pReNative);
3130 return NULL;
3131}
3132
3133
3134/**
3135 * Creates a label
3136 *
3137 * If the label does not yet have a defined position,
3138 * call iemNativeLabelDefine() later to set it.
3139 *
3140 * @returns Label ID. Throws VBox status code on failure, so no need to check
3141 * the return value.
3142 * @param pReNative The native recompile state.
3143 * @param enmType The label type.
3144 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3145 * label is not yet defined (default).
3146 * @param uData Data associated with the lable. Only applicable to
3147 * certain type of labels. Default is zero.
3148 */
3149DECL_HIDDEN_THROW(uint32_t)
3150iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3151 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3152{
3153 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3154
3155 /*
3156 * Locate existing label definition.
3157 *
3158 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3159 * and uData is zero.
3160 */
3161 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3162 uint32_t const cLabels = pReNative->cLabels;
3163 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3164#ifndef VBOX_STRICT
3165 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3166 && offWhere == UINT32_MAX
3167 && uData == 0
3168#endif
3169 )
3170 {
3171#ifndef VBOX_STRICT
3172 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3173 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3174 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3175 if (idxLabel < pReNative->cLabels)
3176 return idxLabel;
3177#else
3178 for (uint32_t i = 0; i < cLabels; i++)
3179 if ( paLabels[i].enmType == enmType
3180 && paLabels[i].uData == uData)
3181 {
3182 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3183 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3184 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3185 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3186 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3187 return i;
3188 }
3189 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3190 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191#endif
3192 }
3193
3194 /*
3195 * Make sure we've got room for another label.
3196 */
3197 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3198 { /* likely */ }
3199 else
3200 {
3201 uint32_t cNew = pReNative->cLabelsAlloc;
3202 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3203 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3204 cNew *= 2;
3205 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3206 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3207 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3208 pReNative->paLabels = paLabels;
3209 pReNative->cLabelsAlloc = cNew;
3210 }
3211
3212 /*
3213 * Define a new label.
3214 */
3215 paLabels[cLabels].off = offWhere;
3216 paLabels[cLabels].enmType = enmType;
3217 paLabels[cLabels].uData = uData;
3218 pReNative->cLabels = cLabels + 1;
3219
3220 Assert((unsigned)enmType < 64);
3221 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3222
3223 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3224 {
3225 Assert(uData == 0);
3226 pReNative->aidxUniqueLabels[enmType] = cLabels;
3227 }
3228
3229 if (offWhere != UINT32_MAX)
3230 {
3231#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3232 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3233 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3234#endif
3235 }
3236 return cLabels;
3237}
3238
3239
3240/**
3241 * Defines the location of an existing label.
3242 *
3243 * @param pReNative The native recompile state.
3244 * @param idxLabel The label to define.
3245 * @param offWhere The position.
3246 */
3247DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3248{
3249 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3250 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3251 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3252 pLabel->off = offWhere;
3253#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3254 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3255 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3256#endif
3257}
3258
3259
3260/**
3261 * Looks up a lable.
3262 *
3263 * @returns Label ID if found, UINT32_MAX if not.
3264 */
3265static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3266 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3267{
3268 Assert((unsigned)enmType < 64);
3269 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3270 {
3271 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3272 return pReNative->aidxUniqueLabels[enmType];
3273
3274 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3275 uint32_t const cLabels = pReNative->cLabels;
3276 for (uint32_t i = 0; i < cLabels; i++)
3277 if ( paLabels[i].enmType == enmType
3278 && paLabels[i].uData == uData
3279 && ( paLabels[i].off == offWhere
3280 || offWhere == UINT32_MAX
3281 || paLabels[i].off == UINT32_MAX))
3282 return i;
3283 }
3284 return UINT32_MAX;
3285}
3286
3287
3288/**
3289 * Adds a fixup.
3290 *
3291 * @throws VBox status code (int) on failure.
3292 * @param pReNative The native recompile state.
3293 * @param offWhere The instruction offset of the fixup location.
3294 * @param idxLabel The target label ID for the fixup.
3295 * @param enmType The fixup type.
3296 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3297 */
3298DECL_HIDDEN_THROW(void)
3299iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3300 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3301{
3302 Assert(idxLabel <= UINT16_MAX);
3303 Assert((unsigned)enmType <= UINT8_MAX);
3304
3305 /*
3306 * Make sure we've room.
3307 */
3308 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3309 uint32_t const cFixups = pReNative->cFixups;
3310 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3311 { /* likely */ }
3312 else
3313 {
3314 uint32_t cNew = pReNative->cFixupsAlloc;
3315 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3316 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3317 cNew *= 2;
3318 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3319 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3320 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3321 pReNative->paFixups = paFixups;
3322 pReNative->cFixupsAlloc = cNew;
3323 }
3324
3325 /*
3326 * Add the fixup.
3327 */
3328 paFixups[cFixups].off = offWhere;
3329 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3330 paFixups[cFixups].enmType = enmType;
3331 paFixups[cFixups].offAddend = offAddend;
3332 pReNative->cFixups = cFixups + 1;
3333}
3334
3335
3336/**
3337 * Slow code path for iemNativeInstrBufEnsure.
3338 */
3339DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3340{
3341 /* Double the buffer size till we meet the request. */
3342 uint32_t cNew = pReNative->cInstrBufAlloc;
3343 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3344 do
3345 cNew *= 2;
3346 while (cNew < off + cInstrReq);
3347
3348 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3349#ifdef RT_ARCH_ARM64
3350 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3351#else
3352 uint32_t const cbMaxInstrBuf = _2M;
3353#endif
3354 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3355
3356 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3357 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3358
3359#ifdef VBOX_STRICT
3360 pReNative->offInstrBufChecked = off + cInstrReq;
3361#endif
3362 pReNative->cInstrBufAlloc = cNew;
3363 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3364}
3365
3366#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3367
3368/**
3369 * Grows the static debug info array used during recompilation.
3370 *
3371 * @returns Pointer to the new debug info block; throws VBox status code on
3372 * failure, so no need to check the return value.
3373 */
3374DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3375{
3376 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3377 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3378 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3379 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3380 pReNative->pDbgInfo = pDbgInfo;
3381 pReNative->cDbgInfoAlloc = cNew;
3382 return pDbgInfo;
3383}
3384
3385
3386/**
3387 * Adds a new debug info uninitialized entry, returning the pointer to it.
3388 */
3389DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3390{
3391 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3392 { /* likely */ }
3393 else
3394 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3395 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3396}
3397
3398
3399/**
3400 * Debug Info: Adds a native offset record, if necessary.
3401 */
3402DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3403{
3404 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3405
3406 /*
3407 * Search backwards to see if we've got a similar record already.
3408 */
3409 uint32_t idx = pDbgInfo->cEntries;
3410 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3411 while (idx-- > idxStop)
3412 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3413 {
3414 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3415 return;
3416 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3417 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3418 break;
3419 }
3420
3421 /*
3422 * Add it.
3423 */
3424 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3425 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3426 pEntry->NativeOffset.offNative = off;
3427}
3428
3429
3430/**
3431 * Debug Info: Record info about a label.
3432 */
3433static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3434{
3435 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3436 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3437 pEntry->Label.uUnused = 0;
3438 pEntry->Label.enmLabel = (uint8_t)enmType;
3439 pEntry->Label.uData = uData;
3440}
3441
3442
3443/**
3444 * Debug Info: Record info about a threaded call.
3445 */
3446static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3447{
3448 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3449 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3450 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3451 pEntry->ThreadedCall.uUnused = 0;
3452 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3453}
3454
3455
3456/**
3457 * Debug Info: Record info about a new guest instruction.
3458 */
3459static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3460{
3461 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3462 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3463 pEntry->GuestInstruction.uUnused = 0;
3464 pEntry->GuestInstruction.fExec = fExec;
3465}
3466
3467
3468/**
3469 * Debug Info: Record info about guest register shadowing.
3470 */
3471DECL_HIDDEN_THROW(void)
3472iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3473 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3474{
3475 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3476 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3477 pEntry->GuestRegShadowing.uUnused = 0;
3478 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3479 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3480 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3481}
3482
3483
3484# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3485/**
3486 * Debug Info: Record info about guest register shadowing.
3487 */
3488DECL_HIDDEN_THROW(void)
3489iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3490 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3491{
3492 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3493 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3494 pEntry->GuestSimdRegShadowing.uUnused = 0;
3495 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3496 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3497 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3498}
3499# endif
3500
3501
3502# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3503/**
3504 * Debug Info: Record info about delayed RIP updates.
3505 */
3506DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3507{
3508 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3509 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3510 pEntry->DelayedPcUpdate.offPc = offPc;
3511 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3512}
3513# endif
3514
3515#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3516
3517
3518/*********************************************************************************************************************************
3519* Register Allocator *
3520*********************************************************************************************************************************/
3521
3522/**
3523 * Register parameter indexes (indexed by argument number).
3524 */
3525DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3526{
3527 IEMNATIVE_CALL_ARG0_GREG,
3528 IEMNATIVE_CALL_ARG1_GREG,
3529 IEMNATIVE_CALL_ARG2_GREG,
3530 IEMNATIVE_CALL_ARG3_GREG,
3531#if defined(IEMNATIVE_CALL_ARG4_GREG)
3532 IEMNATIVE_CALL_ARG4_GREG,
3533# if defined(IEMNATIVE_CALL_ARG5_GREG)
3534 IEMNATIVE_CALL_ARG5_GREG,
3535# if defined(IEMNATIVE_CALL_ARG6_GREG)
3536 IEMNATIVE_CALL_ARG6_GREG,
3537# if defined(IEMNATIVE_CALL_ARG7_GREG)
3538 IEMNATIVE_CALL_ARG7_GREG,
3539# endif
3540# endif
3541# endif
3542#endif
3543};
3544AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3545
3546/**
3547 * Call register masks indexed by argument count.
3548 */
3549DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3550{
3551 0,
3552 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3553 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3554 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3555 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3556 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3557#if defined(IEMNATIVE_CALL_ARG4_GREG)
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3559 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3560# if defined(IEMNATIVE_CALL_ARG5_GREG)
3561 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3562 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3563# if defined(IEMNATIVE_CALL_ARG6_GREG)
3564 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3565 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3567# if defined(IEMNATIVE_CALL_ARG7_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3571# endif
3572# endif
3573# endif
3574#endif
3575};
3576
3577#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3578/**
3579 * BP offset of the stack argument slots.
3580 *
3581 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3582 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3583 */
3584DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3585{
3586 IEMNATIVE_FP_OFF_STACK_ARG0,
3587# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3588 IEMNATIVE_FP_OFF_STACK_ARG1,
3589# endif
3590# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3591 IEMNATIVE_FP_OFF_STACK_ARG2,
3592# endif
3593# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3594 IEMNATIVE_FP_OFF_STACK_ARG3,
3595# endif
3596};
3597AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3598#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3599
3600/**
3601 * Info about shadowed guest register values.
3602 * @see IEMNATIVEGSTREG
3603 */
3604DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3605{
3606#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3607 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3608 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3609 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3610 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3611 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3612 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3613 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3614 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3615 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3616 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3617 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3618 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3623 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3624 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3625 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3626 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3627 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3628 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3629 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3630 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3631 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3632 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3633 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3634 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3635 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3636 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3637 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3638 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3639 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3640 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3641 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3642 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3643 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3644 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3645 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3646 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3647 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3648 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3649 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3650 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3651 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3652 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3653 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3654 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3655#undef CPUMCTX_OFF_AND_SIZE
3656};
3657AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3658
3659
3660/** Host CPU general purpose register names. */
3661DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3662{
3663#ifdef RT_ARCH_AMD64
3664 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3665#elif RT_ARCH_ARM64
3666 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3667 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3668#else
3669# error "port me"
3670#endif
3671};
3672
3673
3674#if 0 /* unused */
3675/**
3676 * Tries to locate a suitable register in the given register mask.
3677 *
3678 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3679 * failed.
3680 *
3681 * @returns Host register number on success, returns UINT8_MAX on failure.
3682 */
3683static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3684{
3685 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3686 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3687 if (fRegs)
3688 {
3689 /** @todo pick better here: */
3690 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3691
3692 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3693 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3694 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3695 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3696
3697 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3698 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3699 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3700 return idxReg;
3701 }
3702 return UINT8_MAX;
3703}
3704#endif /* unused */
3705
3706
3707/**
3708 * Locate a register, possibly freeing one up.
3709 *
3710 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3711 * failed.
3712 *
3713 * @returns Host register number on success. Returns UINT8_MAX if no registers
3714 * found, the caller is supposed to deal with this and raise a
3715 * allocation type specific status code (if desired).
3716 *
3717 * @throws VBox status code if we're run into trouble spilling a variable of
3718 * recording debug info. Does NOT throw anything if we're out of
3719 * registers, though.
3720 */
3721static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3722 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3723{
3724 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3725 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3726 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3727
3728 /*
3729 * Try a freed register that's shadowing a guest register.
3730 */
3731 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3732 if (fRegs)
3733 {
3734 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3735
3736#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3737 /*
3738 * When we have livness information, we use it to kick out all shadowed
3739 * guest register that will not be needed any more in this TB. If we're
3740 * lucky, this may prevent us from ending up here again.
3741 *
3742 * Note! We must consider the previous entry here so we don't free
3743 * anything that the current threaded function requires (current
3744 * entry is produced by the next threaded function).
3745 */
3746 uint32_t const idxCurCall = pReNative->idxCurCall;
3747 if (idxCurCall > 0)
3748 {
3749 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3750
3751# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3752 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3753 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3754 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3755#else
3756 /* Construct a mask of the registers not in the read or write state.
3757 Note! We could skips writes, if they aren't from us, as this is just
3758 a hack to prevent trashing registers that have just been written
3759 or will be written when we retire the current instruction. */
3760 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3761 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3762 & IEMLIVENESSBIT_MASK;
3763#endif
3764 /* Merge EFLAGS. */
3765 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3766 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3767 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3768 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3769 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3770
3771 /* If it matches any shadowed registers. */
3772 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3773 {
3774 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3775 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3776 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3777
3778 /* See if we've got any unshadowed registers we can return now. */
3779 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3780 if (fUnshadowedRegs)
3781 {
3782 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3783 return (fPreferVolatile
3784 ? ASMBitFirstSetU32(fUnshadowedRegs)
3785 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3786 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3787 - 1;
3788 }
3789 }
3790 }
3791#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3792
3793 unsigned const idxReg = (fPreferVolatile
3794 ? ASMBitFirstSetU32(fRegs)
3795 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3796 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3797 - 1;
3798
3799 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3800 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3801 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3802 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3803
3804 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3805 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3806 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3807 return idxReg;
3808 }
3809
3810 /*
3811 * Try free up a variable that's in a register.
3812 *
3813 * We do two rounds here, first evacuating variables we don't need to be
3814 * saved on the stack, then in the second round move things to the stack.
3815 */
3816 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3817 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3818 {
3819 uint32_t fVars = pReNative->Core.bmVars;
3820 while (fVars)
3821 {
3822 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3823 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3824 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3825 && (RT_BIT_32(idxReg) & fRegMask)
3826 && ( iLoop == 0
3827 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3828 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3829 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3830 {
3831 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3832 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3833 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3834 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3835 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3836 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3837
3838 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3839 {
3840 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3841 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3842 }
3843
3844 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3845 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3846
3847 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3848 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3849 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3850 return idxReg;
3851 }
3852 fVars &= ~RT_BIT_32(idxVar);
3853 }
3854 }
3855
3856 return UINT8_MAX;
3857}
3858
3859
3860/**
3861 * Reassigns a variable to a different register specified by the caller.
3862 *
3863 * @returns The new code buffer position.
3864 * @param pReNative The native recompile state.
3865 * @param off The current code buffer position.
3866 * @param idxVar The variable index.
3867 * @param idxRegOld The old host register number.
3868 * @param idxRegNew The new host register number.
3869 * @param pszCaller The caller for logging.
3870 */
3871static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3872 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3873{
3874 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3875 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3876 RT_NOREF(pszCaller);
3877
3878 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3879
3880 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3881 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3882 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3883 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3884
3885 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3886 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3887 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3888 if (fGstRegShadows)
3889 {
3890 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3891 | RT_BIT_32(idxRegNew);
3892 while (fGstRegShadows)
3893 {
3894 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3895 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3896
3897 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3898 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3899 }
3900 }
3901
3902 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3903 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3904 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3905 return off;
3906}
3907
3908
3909/**
3910 * Moves a variable to a different register or spills it onto the stack.
3911 *
3912 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3913 * kinds can easily be recreated if needed later.
3914 *
3915 * @returns The new code buffer position.
3916 * @param pReNative The native recompile state.
3917 * @param off The current code buffer position.
3918 * @param idxVar The variable index.
3919 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3920 * call-volatile registers.
3921 */
3922DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3923 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3924{
3925 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3926 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3927 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3928 Assert(!pVar->fRegAcquired);
3929
3930 uint8_t const idxRegOld = pVar->idxReg;
3931 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3932 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3933 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3934 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3935 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3936 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3937 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3938 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3939
3940
3941 /** @todo Add statistics on this.*/
3942 /** @todo Implement basic variable liveness analysis (python) so variables
3943 * can be freed immediately once no longer used. This has the potential to
3944 * be trashing registers and stack for dead variables.
3945 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3946
3947 /*
3948 * First try move it to a different register, as that's cheaper.
3949 */
3950 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3951 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3952 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3953 if (fRegs)
3954 {
3955 /* Avoid using shadow registers, if possible. */
3956 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3957 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3958 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3959 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3960 }
3961
3962 /*
3963 * Otherwise we must spill the register onto the stack.
3964 */
3965 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3966 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3967 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3968 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3969
3970 pVar->idxReg = UINT8_MAX;
3971 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3972 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3973 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3974 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3975 return off;
3976}
3977
3978
3979/**
3980 * Allocates a temporary host general purpose register.
3981 *
3982 * This may emit code to save register content onto the stack in order to free
3983 * up a register.
3984 *
3985 * @returns The host register number; throws VBox status code on failure,
3986 * so no need to check the return value.
3987 * @param pReNative The native recompile state.
3988 * @param poff Pointer to the variable with the code buffer position.
3989 * This will be update if we need to move a variable from
3990 * register to stack in order to satisfy the request.
3991 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3992 * registers (@c true, default) or the other way around
3993 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3994 */
3995DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3996{
3997 /*
3998 * Try find a completely unused register, preferably a call-volatile one.
3999 */
4000 uint8_t idxReg;
4001 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4002 & ~pReNative->Core.bmHstRegsWithGstShadow
4003 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4004 if (fRegs)
4005 {
4006 if (fPreferVolatile)
4007 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4008 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4009 else
4010 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4011 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4012 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4013 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4014 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4015 }
4016 else
4017 {
4018 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4019 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4020 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4021 }
4022 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4023}
4024
4025
4026/**
4027 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4028 * registers.
4029 *
4030 * @returns The host register number; throws VBox status code on failure,
4031 * so no need to check the return value.
4032 * @param pReNative The native recompile state.
4033 * @param poff Pointer to the variable with the code buffer position.
4034 * This will be update if we need to move a variable from
4035 * register to stack in order to satisfy the request.
4036 * @param fRegMask Mask of acceptable registers.
4037 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4038 * registers (@c true, default) or the other way around
4039 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4040 */
4041DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4042 bool fPreferVolatile /*= true*/)
4043{
4044 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4045 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4046
4047 /*
4048 * Try find a completely unused register, preferably a call-volatile one.
4049 */
4050 uint8_t idxReg;
4051 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4052 & ~pReNative->Core.bmHstRegsWithGstShadow
4053 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4054 & fRegMask;
4055 if (fRegs)
4056 {
4057 if (fPreferVolatile)
4058 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4059 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4060 else
4061 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4062 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4063 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4064 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4065 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4066 }
4067 else
4068 {
4069 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4070 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4071 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4072 }
4073 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4074}
4075
4076
4077/**
4078 * Allocates a temporary register for loading an immediate value into.
4079 *
4080 * This will emit code to load the immediate, unless there happens to be an
4081 * unused register with the value already loaded.
4082 *
4083 * The caller will not modify the returned register, it must be considered
4084 * read-only. Free using iemNativeRegFreeTmpImm.
4085 *
4086 * @returns The host register number; throws VBox status code on failure, so no
4087 * need to check the return value.
4088 * @param pReNative The native recompile state.
4089 * @param poff Pointer to the variable with the code buffer position.
4090 * @param uImm The immediate value that the register must hold upon
4091 * return.
4092 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4093 * registers (@c true, default) or the other way around
4094 * (@c false).
4095 *
4096 * @note Reusing immediate values has not been implemented yet.
4097 */
4098DECL_HIDDEN_THROW(uint8_t)
4099iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4100{
4101 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4102 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4103 return idxReg;
4104}
4105
4106
4107/**
4108 * Allocates a temporary host general purpose register for keeping a guest
4109 * register value.
4110 *
4111 * Since we may already have a register holding the guest register value,
4112 * code will be emitted to do the loading if that's not the case. Code may also
4113 * be emitted if we have to free up a register to satify the request.
4114 *
4115 * @returns The host register number; throws VBox status code on failure, so no
4116 * need to check the return value.
4117 * @param pReNative The native recompile state.
4118 * @param poff Pointer to the variable with the code buffer
4119 * position. This will be update if we need to move a
4120 * variable from register to stack in order to satisfy
4121 * the request.
4122 * @param enmGstReg The guest register that will is to be updated.
4123 * @param enmIntendedUse How the caller will be using the host register.
4124 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4125 * register is okay (default). The ASSUMPTION here is
4126 * that the caller has already flushed all volatile
4127 * registers, so this is only applied if we allocate a
4128 * new register.
4129 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4130 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4131 */
4132DECL_HIDDEN_THROW(uint8_t)
4133iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4134 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4135 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4136{
4137 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4138#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4139 AssertMsg( fSkipLivenessAssert
4140 || pReNative->idxCurCall == 0
4141 || enmGstReg == kIemNativeGstReg_Pc
4142 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4143 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4144 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4145 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4146 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4147 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4148#endif
4149 RT_NOREF(fSkipLivenessAssert);
4150#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4151 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4152#endif
4153 uint32_t const fRegMask = !fNoVolatileRegs
4154 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4155 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4156
4157 /*
4158 * First check if the guest register value is already in a host register.
4159 */
4160 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4161 {
4162 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4163 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4164 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4165 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4166
4167 /* It's not supposed to be allocated... */
4168 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4169 {
4170 /*
4171 * If the register will trash the guest shadow copy, try find a
4172 * completely unused register we can use instead. If that fails,
4173 * we need to disassociate the host reg from the guest reg.
4174 */
4175 /** @todo would be nice to know if preserving the register is in any way helpful. */
4176 /* If the purpose is calculations, try duplicate the register value as
4177 we'll be clobbering the shadow. */
4178 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4179 && ( ~pReNative->Core.bmHstRegs
4180 & ~pReNative->Core.bmHstRegsWithGstShadow
4181 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4182 {
4183 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4184
4185 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4186
4187 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4188 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4189 g_apszIemNativeHstRegNames[idxRegNew]));
4190 idxReg = idxRegNew;
4191 }
4192 /* If the current register matches the restrictions, go ahead and allocate
4193 it for the caller. */
4194 else if (fRegMask & RT_BIT_32(idxReg))
4195 {
4196 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4197 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4198 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4199 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4200 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4201 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4202 else
4203 {
4204 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4205 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4206 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4207 }
4208 }
4209 /* Otherwise, allocate a register that satisfies the caller and transfer
4210 the shadowing if compatible with the intended use. (This basically
4211 means the call wants a non-volatile register (RSP push/pop scenario).) */
4212 else
4213 {
4214 Assert(fNoVolatileRegs);
4215 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4216 !fNoVolatileRegs
4217 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4218 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4219 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4220 {
4221 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4222 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4223 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4224 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4225 }
4226 else
4227 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4228 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4229 g_apszIemNativeHstRegNames[idxRegNew]));
4230 idxReg = idxRegNew;
4231 }
4232 }
4233 else
4234 {
4235 /*
4236 * Oops. Shadowed guest register already allocated!
4237 *
4238 * Allocate a new register, copy the value and, if updating, the
4239 * guest shadow copy assignment to the new register.
4240 */
4241 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4242 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4243 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4244 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4245
4246 /** @todo share register for readonly access. */
4247 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4248 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4249
4250 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4251 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4252
4253 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4254 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4255 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4256 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4257 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4258 else
4259 {
4260 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4261 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4262 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4263 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4264 }
4265 idxReg = idxRegNew;
4266 }
4267 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4268
4269#ifdef VBOX_STRICT
4270 /* Strict builds: Check that the value is correct. */
4271 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4272#endif
4273
4274 return idxReg;
4275 }
4276
4277 /*
4278 * Allocate a new register, load it with the guest value and designate it as a copy of the
4279 */
4280 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4281
4282 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4283 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4284
4285 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4286 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4287 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4288 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4289
4290 return idxRegNew;
4291}
4292
4293
4294/**
4295 * Allocates a temporary host general purpose register that already holds the
4296 * given guest register value.
4297 *
4298 * The use case for this function is places where the shadowing state cannot be
4299 * modified due to branching and such. This will fail if the we don't have a
4300 * current shadow copy handy or if it's incompatible. The only code that will
4301 * be emitted here is value checking code in strict builds.
4302 *
4303 * The intended use can only be readonly!
4304 *
4305 * @returns The host register number, UINT8_MAX if not present.
4306 * @param pReNative The native recompile state.
4307 * @param poff Pointer to the instruction buffer offset.
4308 * Will be updated in strict builds if a register is
4309 * found.
4310 * @param enmGstReg The guest register that will is to be updated.
4311 * @note In strict builds, this may throw instruction buffer growth failures.
4312 * Non-strict builds will not throw anything.
4313 * @sa iemNativeRegAllocTmpForGuestReg
4314 */
4315DECL_HIDDEN_THROW(uint8_t)
4316iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4317{
4318 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4320 AssertMsg( pReNative->idxCurCall == 0
4321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4322 || enmGstReg == kIemNativeGstReg_Pc,
4323 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4324#endif
4325
4326 /*
4327 * First check if the guest register value is already in a host register.
4328 */
4329 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4330 {
4331 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4332 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4333 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4334 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4335
4336 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4337 {
4338 /*
4339 * We only do readonly use here, so easy compared to the other
4340 * variant of this code.
4341 */
4342 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4343 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4344 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4345 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4346 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4347
4348#ifdef VBOX_STRICT
4349 /* Strict builds: Check that the value is correct. */
4350 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4351#else
4352 RT_NOREF(poff);
4353#endif
4354 return idxReg;
4355 }
4356 }
4357
4358 return UINT8_MAX;
4359}
4360
4361
4362/**
4363 * Allocates argument registers for a function call.
4364 *
4365 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4366 * need to check the return value.
4367 * @param pReNative The native recompile state.
4368 * @param off The current code buffer offset.
4369 * @param cArgs The number of arguments the function call takes.
4370 */
4371DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4372{
4373 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4374 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4375 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4376 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4377
4378 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4379 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4380 else if (cArgs == 0)
4381 return true;
4382
4383 /*
4384 * Do we get luck and all register are free and not shadowing anything?
4385 */
4386 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4387 for (uint32_t i = 0; i < cArgs; i++)
4388 {
4389 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4390 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4391 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4392 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4393 }
4394 /*
4395 * Okay, not lucky so we have to free up the registers.
4396 */
4397 else
4398 for (uint32_t i = 0; i < cArgs; i++)
4399 {
4400 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4401 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4402 {
4403 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4404 {
4405 case kIemNativeWhat_Var:
4406 {
4407 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4408 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4409 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4410 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4411 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4412
4413 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4414 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4415 else
4416 {
4417 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4418 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4419 }
4420 break;
4421 }
4422
4423 case kIemNativeWhat_Tmp:
4424 case kIemNativeWhat_Arg:
4425 case kIemNativeWhat_rc:
4426 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4427 default:
4428 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4429 }
4430
4431 }
4432 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4433 {
4434 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4435 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4436 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4437 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4438 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4439 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4440 }
4441 else
4442 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4443 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4444 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4445 }
4446 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4447 return true;
4448}
4449
4450
4451DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4452
4453
4454#if 0
4455/**
4456 * Frees a register assignment of any type.
4457 *
4458 * @param pReNative The native recompile state.
4459 * @param idxHstReg The register to free.
4460 *
4461 * @note Does not update variables.
4462 */
4463DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4464{
4465 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4466 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4467 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4468 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4469 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4470 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4471 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4472 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4473 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4474 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4475 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4476 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4477 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4478 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4479
4480 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4481 /* no flushing, right:
4482 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4483 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4484 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4485 */
4486}
4487#endif
4488
4489
4490/**
4491 * Frees a temporary register.
4492 *
4493 * Any shadow copies of guest registers assigned to the host register will not
4494 * be flushed by this operation.
4495 */
4496DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4497{
4498 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4499 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4500 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4501 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4502 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4503}
4504
4505
4506/**
4507 * Frees a temporary immediate register.
4508 *
4509 * It is assumed that the call has not modified the register, so it still hold
4510 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4511 */
4512DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4513{
4514 iemNativeRegFreeTmp(pReNative, idxHstReg);
4515}
4516
4517
4518/**
4519 * Frees a register assigned to a variable.
4520 *
4521 * The register will be disassociated from the variable.
4522 */
4523DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4524{
4525 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4526 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4527 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4528 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4529 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4530
4531 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4532 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4533 if (!fFlushShadows)
4534 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4535 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4536 else
4537 {
4538 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4539 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4540 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4541 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4542 uint64_t fGstRegShadows = fGstRegShadowsOld;
4543 while (fGstRegShadows)
4544 {
4545 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4546 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4547
4548 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4549 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4550 }
4551 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4552 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4553 }
4554}
4555
4556
4557/**
4558 * Called right before emitting a call instruction to move anything important
4559 * out of call-volatile registers, free and flush the call-volatile registers,
4560 * optionally freeing argument variables.
4561 *
4562 * @returns New code buffer offset, UINT32_MAX on failure.
4563 * @param pReNative The native recompile state.
4564 * @param off The code buffer offset.
4565 * @param cArgs The number of arguments the function call takes.
4566 * It is presumed that the host register part of these have
4567 * been allocated as such already and won't need moving,
4568 * just freeing.
4569 * @param fKeepVars Mask of variables that should keep their register
4570 * assignments. Caller must take care to handle these.
4571 */
4572DECL_HIDDEN_THROW(uint32_t)
4573iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4574{
4575 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4576
4577 /* fKeepVars will reduce this mask. */
4578 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4579
4580 /*
4581 * Move anything important out of volatile registers.
4582 */
4583 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4584 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4585 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4586#ifdef IEMNATIVE_REG_FIXED_TMP0
4587 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4588#endif
4589#ifdef IEMNATIVE_REG_FIXED_TMP1
4590 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4591#endif
4592#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4593 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4594#endif
4595 & ~g_afIemNativeCallRegs[cArgs];
4596
4597 fRegsToMove &= pReNative->Core.bmHstRegs;
4598 if (!fRegsToMove)
4599 { /* likely */ }
4600 else
4601 {
4602 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4603 while (fRegsToMove != 0)
4604 {
4605 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4606 fRegsToMove &= ~RT_BIT_32(idxReg);
4607
4608 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4609 {
4610 case kIemNativeWhat_Var:
4611 {
4612 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4613 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4614 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4615 Assert(pVar->idxReg == idxReg);
4616 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4617 {
4618 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4619 idxVar, pVar->enmKind, pVar->idxReg));
4620 if (pVar->enmKind != kIemNativeVarKind_Stack)
4621 pVar->idxReg = UINT8_MAX;
4622 else
4623 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4624 }
4625 else
4626 fRegsToFree &= ~RT_BIT_32(idxReg);
4627 continue;
4628 }
4629
4630 case kIemNativeWhat_Arg:
4631 AssertMsgFailed(("What?!?: %u\n", idxReg));
4632 continue;
4633
4634 case kIemNativeWhat_rc:
4635 case kIemNativeWhat_Tmp:
4636 AssertMsgFailed(("Missing free: %u\n", idxReg));
4637 continue;
4638
4639 case kIemNativeWhat_FixedTmp:
4640 case kIemNativeWhat_pVCpuFixed:
4641 case kIemNativeWhat_pCtxFixed:
4642 case kIemNativeWhat_PcShadow:
4643 case kIemNativeWhat_FixedReserved:
4644 case kIemNativeWhat_Invalid:
4645 case kIemNativeWhat_End:
4646 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4647 }
4648 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4649 }
4650 }
4651
4652 /*
4653 * Do the actual freeing.
4654 */
4655 if (pReNative->Core.bmHstRegs & fRegsToFree)
4656 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4657 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4658 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4659
4660 /* If there are guest register shadows in any call-volatile register, we
4661 have to clear the corrsponding guest register masks for each register. */
4662 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4663 if (fHstRegsWithGstShadow)
4664 {
4665 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4666 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4667 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4668 do
4669 {
4670 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4671 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4672
4673 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4674 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4675 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4676 } while (fHstRegsWithGstShadow != 0);
4677 }
4678
4679 return off;
4680}
4681
4682
4683/**
4684 * Flushes a set of guest register shadow copies.
4685 *
4686 * This is usually done after calling a threaded function or a C-implementation
4687 * of an instruction.
4688 *
4689 * @param pReNative The native recompile state.
4690 * @param fGstRegs Set of guest registers to flush.
4691 */
4692DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4693{
4694 /*
4695 * Reduce the mask by what's currently shadowed
4696 */
4697 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4698 fGstRegs &= bmGstRegShadowsOld;
4699 if (fGstRegs)
4700 {
4701 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4702 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4703 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4704 if (bmGstRegShadowsNew)
4705 {
4706 /*
4707 * Partial.
4708 */
4709 do
4710 {
4711 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4712 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4713 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4714 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4715 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4716
4717 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4718 fGstRegs &= ~fInThisHstReg;
4719 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4720 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4721 if (!fGstRegShadowsNew)
4722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4723 } while (fGstRegs != 0);
4724 }
4725 else
4726 {
4727 /*
4728 * Clear all.
4729 */
4730 do
4731 {
4732 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4733 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4734 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4735 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4736 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4737
4738 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4739 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4740 } while (fGstRegs != 0);
4741 pReNative->Core.bmHstRegsWithGstShadow = 0;
4742 }
4743 }
4744}
4745
4746
4747/**
4748 * Flushes guest register shadow copies held by a set of host registers.
4749 *
4750 * This is used with the TLB lookup code for ensuring that we don't carry on
4751 * with any guest shadows in volatile registers, as these will get corrupted by
4752 * a TLB miss.
4753 *
4754 * @param pReNative The native recompile state.
4755 * @param fHstRegs Set of host registers to flush guest shadows for.
4756 */
4757DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4758{
4759 /*
4760 * Reduce the mask by what's currently shadowed.
4761 */
4762 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4763 fHstRegs &= bmHstRegsWithGstShadowOld;
4764 if (fHstRegs)
4765 {
4766 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4767 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4768 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4769 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4770 if (bmHstRegsWithGstShadowNew)
4771 {
4772 /*
4773 * Partial (likely).
4774 */
4775 uint64_t fGstShadows = 0;
4776 do
4777 {
4778 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4779 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4780 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4781 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4782
4783 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4784 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4785 fHstRegs &= ~RT_BIT_32(idxHstReg);
4786 } while (fHstRegs != 0);
4787 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4788 }
4789 else
4790 {
4791 /*
4792 * Clear all.
4793 */
4794 do
4795 {
4796 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4797 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4798 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4799 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4800
4801 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4802 fHstRegs &= ~RT_BIT_32(idxHstReg);
4803 } while (fHstRegs != 0);
4804 pReNative->Core.bmGstRegShadows = 0;
4805 }
4806 }
4807}
4808
4809
4810/**
4811 * Restores guest shadow copies in volatile registers.
4812 *
4813 * This is used after calling a helper function (think TLB miss) to restore the
4814 * register state of volatile registers.
4815 *
4816 * @param pReNative The native recompile state.
4817 * @param off The code buffer offset.
4818 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4819 * be active (allocated) w/o asserting. Hack.
4820 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4821 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4822 */
4823DECL_HIDDEN_THROW(uint32_t)
4824iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4825{
4826 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4827 if (fHstRegs)
4828 {
4829 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4830 do
4831 {
4832 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4833
4834 /* It's not fatal if a register is active holding a variable that
4835 shadowing a guest register, ASSUMING all pending guest register
4836 writes were flushed prior to the helper call. However, we'll be
4837 emitting duplicate restores, so it wasts code space. */
4838 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4839 RT_NOREF(fHstRegsActiveShadows);
4840
4841 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4842 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4843 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4844 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4845
4846 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4847 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4848
4849 fHstRegs &= ~RT_BIT_32(idxHstReg);
4850 } while (fHstRegs != 0);
4851 }
4852 return off;
4853}
4854
4855
4856
4857
4858/*********************************************************************************************************************************
4859* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4860*********************************************************************************************************************************/
4861#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4862
4863/**
4864 * Info about shadowed guest SIMD register values.
4865 * @see IEMNATIVEGSTSIMDREG
4866 */
4867static struct
4868{
4869 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4870 uint32_t offXmm;
4871 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4872 uint32_t offYmm;
4873 /** Name (for logging). */
4874 const char *pszName;
4875} const g_aGstSimdShadowInfo[] =
4876{
4877#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4878 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4879 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4880 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4881 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4882 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4883 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4884 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4885 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4886 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4887 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4888 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4889 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4890 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4891 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4892 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4893 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4894 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4895#undef CPUMCTX_OFF_AND_SIZE
4896};
4897AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4898
4899
4900#ifdef LOG_ENABLED
4901/** Host CPU SIMD register names. */
4902DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4903{
4904#ifdef RT_ARCH_AMD64
4905 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4906#elif RT_ARCH_ARM64
4907 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4908 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4909#else
4910# error "port me"
4911#endif
4912};
4913#endif
4914
4915
4916/**
4917 * Frees a temporary SIMD register.
4918 *
4919 * Any shadow copies of guest registers assigned to the host register will not
4920 * be flushed by this operation.
4921 */
4922DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4923{
4924 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4925 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4926 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4927 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4928 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4929}
4930
4931
4932/**
4933 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4934 *
4935 * @returns New code bufferoffset.
4936 * @param pReNative The native recompile state.
4937 * @param off Current code buffer position.
4938 * @param enmGstSimdReg The guest SIMD register to flush.
4939 */
4940DECL_HIDDEN_THROW(uint32_t)
4941iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4942{
4943 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4944
4945 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4946 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4947 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4948 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4949
4950 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4951 {
4952 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4953 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4954 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4955 }
4956
4957 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4958 {
4959 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4960 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4961 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4962 }
4963
4964 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4965 return off;
4966}
4967
4968
4969/**
4970 * Locate a register, possibly freeing one up.
4971 *
4972 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4973 * failed.
4974 *
4975 * @returns Host register number on success. Returns UINT8_MAX if no registers
4976 * found, the caller is supposed to deal with this and raise a
4977 * allocation type specific status code (if desired).
4978 *
4979 * @throws VBox status code if we're run into trouble spilling a variable of
4980 * recording debug info. Does NOT throw anything if we're out of
4981 * registers, though.
4982 */
4983static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4984 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4985{
4986 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4987 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4988 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4989
4990 /*
4991 * Try a freed register that's shadowing a guest register.
4992 */
4993 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4994 if (fRegs)
4995 {
4996 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4997
4998#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4999 /*
5000 * When we have livness information, we use it to kick out all shadowed
5001 * guest register that will not be needed any more in this TB. If we're
5002 * lucky, this may prevent us from ending up here again.
5003 *
5004 * Note! We must consider the previous entry here so we don't free
5005 * anything that the current threaded function requires (current
5006 * entry is produced by the next threaded function).
5007 */
5008 uint32_t const idxCurCall = pReNative->idxCurCall;
5009 if (idxCurCall > 0)
5010 {
5011 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5012
5013# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5014 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5015 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5016 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5017#else
5018 /* Construct a mask of the registers not in the read or write state.
5019 Note! We could skips writes, if they aren't from us, as this is just
5020 a hack to prevent trashing registers that have just been written
5021 or will be written when we retire the current instruction. */
5022 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5023 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5024 & IEMLIVENESSBIT_MASK;
5025#endif
5026 /* If it matches any shadowed registers. */
5027 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5028 {
5029 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5030 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5031 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5032
5033 /* See if we've got any unshadowed registers we can return now. */
5034 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5035 if (fUnshadowedRegs)
5036 {
5037 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5038 return (fPreferVolatile
5039 ? ASMBitFirstSetU32(fUnshadowedRegs)
5040 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5041 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5042 - 1;
5043 }
5044 }
5045 }
5046#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5047
5048 unsigned const idxReg = (fPreferVolatile
5049 ? ASMBitFirstSetU32(fRegs)
5050 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5051 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5052 - 1;
5053
5054 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5055 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5056 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5057 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5058
5059 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5060 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5061 uint32_t idxGstSimdReg = 0;
5062 do
5063 {
5064 if (fGstRegShadows & 0x1)
5065 {
5066 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5067 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5068 }
5069 idxGstSimdReg++;
5070 fGstRegShadows >>= 1;
5071 } while (fGstRegShadows);
5072
5073 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5074 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5075 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5076 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5077 return idxReg;
5078 }
5079
5080 /*
5081 * Try free up a variable that's in a register.
5082 *
5083 * We do two rounds here, first evacuating variables we don't need to be
5084 * saved on the stack, then in the second round move things to the stack.
5085 */
5086 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5087 AssertReleaseFailed(); /** @todo No variable support right now. */
5088#if 0
5089 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5090 {
5091 uint32_t fVars = pReNative->Core.bmSimdVars;
5092 while (fVars)
5093 {
5094 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5095 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5096 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5097 && (RT_BIT_32(idxReg) & fRegMask)
5098 && ( iLoop == 0
5099 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5100 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5101 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5102 {
5103 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5104 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5105 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5106 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5107 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5108 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5109
5110 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5111 {
5112 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5113 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5114 }
5115
5116 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5117 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5118
5119 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5120 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5121 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5122 return idxReg;
5123 }
5124 fVars &= ~RT_BIT_32(idxVar);
5125 }
5126 }
5127#endif
5128
5129 AssertFailed();
5130 return UINT8_MAX;
5131}
5132
5133
5134/**
5135 * Flushes a set of guest register shadow copies.
5136 *
5137 * This is usually done after calling a threaded function or a C-implementation
5138 * of an instruction.
5139 *
5140 * @param pReNative The native recompile state.
5141 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5142 */
5143DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5144{
5145 /*
5146 * Reduce the mask by what's currently shadowed
5147 */
5148 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5149 fGstSimdRegs &= bmGstSimdRegShadows;
5150 if (fGstSimdRegs)
5151 {
5152 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5153 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5154 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5155 if (bmGstSimdRegShadowsNew)
5156 {
5157 /*
5158 * Partial.
5159 */
5160 do
5161 {
5162 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5163 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5164 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5165 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5166 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5167 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5168
5169 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5170 fGstSimdRegs &= ~fInThisHstReg;
5171 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5172 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5173 if (!fGstRegShadowsNew)
5174 {
5175 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5176 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5177 }
5178 } while (fGstSimdRegs != 0);
5179 }
5180 else
5181 {
5182 /*
5183 * Clear all.
5184 */
5185 do
5186 {
5187 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5188 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5189 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5190 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5191 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5192 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5193
5194 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5195 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5196 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5197 } while (fGstSimdRegs != 0);
5198 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5199 }
5200 }
5201}
5202
5203
5204/**
5205 * Allocates a temporary host SIMD register.
5206 *
5207 * This may emit code to save register content onto the stack in order to free
5208 * up a register.
5209 *
5210 * @returns The host register number; throws VBox status code on failure,
5211 * so no need to check the return value.
5212 * @param pReNative The native recompile state.
5213 * @param poff Pointer to the variable with the code buffer position.
5214 * This will be update if we need to move a variable from
5215 * register to stack in order to satisfy the request.
5216 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5217 * registers (@c true, default) or the other way around
5218 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5219 */
5220DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5221{
5222 /*
5223 * Try find a completely unused register, preferably a call-volatile one.
5224 */
5225 uint8_t idxSimdReg;
5226 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5227 & ~pReNative->Core.bmHstRegsWithGstShadow
5228 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5229 if (fRegs)
5230 {
5231 if (fPreferVolatile)
5232 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5233 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5234 else
5235 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5236 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5237 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5238 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5239 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5240 }
5241 else
5242 {
5243 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5244 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5245 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5246 }
5247
5248 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5249 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5250}
5251
5252
5253/**
5254 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5255 * registers.
5256 *
5257 * @returns The host register number; throws VBox status code on failure,
5258 * so no need to check the return value.
5259 * @param pReNative The native recompile state.
5260 * @param poff Pointer to the variable with the code buffer position.
5261 * This will be update if we need to move a variable from
5262 * register to stack in order to satisfy the request.
5263 * @param fRegMask Mask of acceptable registers.
5264 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5265 * registers (@c true, default) or the other way around
5266 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5267 */
5268DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5269 bool fPreferVolatile /*= true*/)
5270{
5271 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5272 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5273
5274 /*
5275 * Try find a completely unused register, preferably a call-volatile one.
5276 */
5277 uint8_t idxSimdReg;
5278 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5279 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5280 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5281 & fRegMask;
5282 if (fRegs)
5283 {
5284 if (fPreferVolatile)
5285 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5286 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5287 else
5288 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5289 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5290 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5291 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5292 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5293 }
5294 else
5295 {
5296 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5297 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5298 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5299 }
5300
5301 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5302 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5303}
5304
5305
5306/**
5307 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5308 *
5309 * @param pReNative The native recompile state.
5310 * @param idxHstSimdReg The host SIMD register to update the state for.
5311 * @param enmLoadSz The load size to set.
5312 */
5313DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5314 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5315{
5316 /* Everything valid already? -> nothing to do. */
5317 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5318 return;
5319
5320 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5321 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5322 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5323 {
5324 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5325 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5326 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5327 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5328 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5329 }
5330}
5331
5332
5333static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5334 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5335{
5336 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5337 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5338 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5339 {
5340# ifdef RT_ARCH_ARM64
5341 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5342 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5343# endif
5344
5345 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5346 {
5347 switch (enmLoadSzDst)
5348 {
5349 case kIemNativeGstSimdRegLdStSz_256:
5350 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5351 break;
5352 case kIemNativeGstSimdRegLdStSz_Low128:
5353 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5354 break;
5355 case kIemNativeGstSimdRegLdStSz_High128:
5356 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5357 break;
5358 default:
5359 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5360 }
5361
5362 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5363 }
5364 }
5365 else
5366 {
5367 /* Complicated stuff where the source is currently missing something, later. */
5368 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5369 }
5370
5371 return off;
5372}
5373
5374
5375/**
5376 * Allocates a temporary host SIMD register for keeping a guest
5377 * SIMD register value.
5378 *
5379 * Since we may already have a register holding the guest register value,
5380 * code will be emitted to do the loading if that's not the case. Code may also
5381 * be emitted if we have to free up a register to satify the request.
5382 *
5383 * @returns The host register number; throws VBox status code on failure, so no
5384 * need to check the return value.
5385 * @param pReNative The native recompile state.
5386 * @param poff Pointer to the variable with the code buffer
5387 * position. This will be update if we need to move a
5388 * variable from register to stack in order to satisfy
5389 * the request.
5390 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5391 * @param enmIntendedUse How the caller will be using the host register.
5392 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5393 * register is okay (default). The ASSUMPTION here is
5394 * that the caller has already flushed all volatile
5395 * registers, so this is only applied if we allocate a
5396 * new register.
5397 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5398 */
5399DECL_HIDDEN_THROW(uint8_t)
5400iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5401 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5402 bool fNoVolatileRegs /*= false*/)
5403{
5404 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5405#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5406 AssertMsg( pReNative->idxCurCall == 0
5407 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5408 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5409 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5410 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5411 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5412 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5413#endif
5414#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5415 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5416#endif
5417 uint32_t const fRegMask = !fNoVolatileRegs
5418 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5419 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5420
5421 /*
5422 * First check if the guest register value is already in a host register.
5423 */
5424 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5425 {
5426 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5427 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5428 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5429 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5430
5431 /* It's not supposed to be allocated... */
5432 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5433 {
5434 /*
5435 * If the register will trash the guest shadow copy, try find a
5436 * completely unused register we can use instead. If that fails,
5437 * we need to disassociate the host reg from the guest reg.
5438 */
5439 /** @todo would be nice to know if preserving the register is in any way helpful. */
5440 /* If the purpose is calculations, try duplicate the register value as
5441 we'll be clobbering the shadow. */
5442 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5443 && ( ~pReNative->Core.bmHstSimdRegs
5444 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5445 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5446 {
5447 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5448
5449 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5450
5451 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5452 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5453 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5454 idxSimdReg = idxRegNew;
5455 }
5456 /* If the current register matches the restrictions, go ahead and allocate
5457 it for the caller. */
5458 else if (fRegMask & RT_BIT_32(idxSimdReg))
5459 {
5460 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5461 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5462 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5463 {
5464 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5465 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5466 else
5467 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5468 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5469 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5470 }
5471 else
5472 {
5473 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5474 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5475 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5476 }
5477 }
5478 /* Otherwise, allocate a register that satisfies the caller and transfer
5479 the shadowing if compatible with the intended use. (This basically
5480 means the call wants a non-volatile register (RSP push/pop scenario).) */
5481 else
5482 {
5483 Assert(fNoVolatileRegs);
5484 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5485 !fNoVolatileRegs
5486 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5487 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5489 {
5490 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5491 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5492 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5493 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5494 }
5495 else
5496 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5497 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5498 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5499 idxSimdReg = idxRegNew;
5500 }
5501 }
5502 else
5503 {
5504 /*
5505 * Oops. Shadowed guest register already allocated!
5506 *
5507 * Allocate a new register, copy the value and, if updating, the
5508 * guest shadow copy assignment to the new register.
5509 */
5510 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5511 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5512 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5513 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5514
5515 /** @todo share register for readonly access. */
5516 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5517 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5518
5519 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5520 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5521 else
5522 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5523
5524 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5525 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5526 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5527 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5528 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5529 else
5530 {
5531 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5532 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5533 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5534 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5535 }
5536 idxSimdReg = idxRegNew;
5537 }
5538 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5539
5540#ifdef VBOX_STRICT
5541 /* Strict builds: Check that the value is correct. */
5542 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5543 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5544#endif
5545
5546 return idxSimdReg;
5547 }
5548
5549 /*
5550 * Allocate a new register, load it with the guest value and designate it as a copy of the
5551 */
5552 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5553
5554 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5555 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5556 else
5557 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5558
5559 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5560 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5561
5562 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5563 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5564
5565 return idxRegNew;
5566}
5567
5568#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5569
5570
5571
5572/*********************************************************************************************************************************
5573* Code emitters for flushing pending guest register writes and sanity checks *
5574*********************************************************************************************************************************/
5575
5576#ifdef VBOX_STRICT
5577/**
5578 * Does internal register allocator sanity checks.
5579 */
5580DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5581{
5582 /*
5583 * Iterate host registers building a guest shadowing set.
5584 */
5585 uint64_t bmGstRegShadows = 0;
5586 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5587 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5588 while (bmHstRegsWithGstShadow)
5589 {
5590 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5591 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5592 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5593
5594 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5595 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5596 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5597 bmGstRegShadows |= fThisGstRegShadows;
5598 while (fThisGstRegShadows)
5599 {
5600 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5601 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5602 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5603 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5604 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5605 }
5606 }
5607 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5608 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5609 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5610
5611 /*
5612 * Now the other way around, checking the guest to host index array.
5613 */
5614 bmHstRegsWithGstShadow = 0;
5615 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5616 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5617 while (bmGstRegShadows)
5618 {
5619 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5620 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5621 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5622
5623 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5624 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5625 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5626 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5627 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5628 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5629 }
5630 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5631 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5632 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5633}
5634#endif /* VBOX_STRICT */
5635
5636
5637/**
5638 * Flushes any delayed guest register writes.
5639 *
5640 * This must be called prior to calling CImpl functions and any helpers that use
5641 * the guest state (like raising exceptions) and such.
5642 *
5643 * This optimization has not yet been implemented. The first target would be
5644 * RIP updates, since these are the most common ones.
5645 */
5646DECL_HIDDEN_THROW(uint32_t)
5647iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5648{
5649#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5650 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5651 off = iemNativeEmitPcWriteback(pReNative, off);
5652#else
5653 RT_NOREF(pReNative, fGstShwExcept);
5654#endif
5655
5656#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5657 /** @todo r=bird: There must be a quicker way to check if anything needs
5658 * doing and then call simd function to do the flushing */
5659 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5660 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5661 {
5662 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5663 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5664
5665 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5666 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5667
5668 if ( fFlushShadows
5669 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5670 {
5671 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5672
5673 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5674 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5675 }
5676 }
5677#else
5678 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5679#endif
5680
5681 return off;
5682}
5683
5684
5685#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5686/**
5687 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5688 */
5689DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5690{
5691 Assert(pReNative->Core.offPc);
5692# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5693 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5694 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5695# endif
5696
5697# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5698 /* Allocate a temporary PC register. */
5699 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5700
5701 /* Perform the addition and store the result. */
5702 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5703 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5704
5705 /* Free but don't flush the PC register. */
5706 iemNativeRegFreeTmp(pReNative, idxPcReg);
5707# else
5708 /* Compare the shadow with the context value, they should match. */
5709 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5710 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5711# endif
5712
5713 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5714 pReNative->Core.offPc = 0;
5715 pReNative->Core.cInstrPcUpdateSkipped = 0;
5716
5717 return off;
5718}
5719#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5720
5721
5722/*********************************************************************************************************************************
5723* Code Emitters (larger snippets) *
5724*********************************************************************************************************************************/
5725
5726/**
5727 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5728 * extending to 64-bit width.
5729 *
5730 * @returns New code buffer offset on success, UINT32_MAX on failure.
5731 * @param pReNative .
5732 * @param off The current code buffer position.
5733 * @param idxHstReg The host register to load the guest register value into.
5734 * @param enmGstReg The guest register to load.
5735 *
5736 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5737 * that is something the caller needs to do if applicable.
5738 */
5739DECL_HIDDEN_THROW(uint32_t)
5740iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5741{
5742 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5743 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5744
5745 switch (g_aGstShadowInfo[enmGstReg].cb)
5746 {
5747 case sizeof(uint64_t):
5748 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5749 case sizeof(uint32_t):
5750 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5751 case sizeof(uint16_t):
5752 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5753#if 0 /* not present in the table. */
5754 case sizeof(uint8_t):
5755 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5756#endif
5757 default:
5758 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5759 }
5760}
5761
5762
5763#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5764/**
5765 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5766 *
5767 * @returns New code buffer offset on success, UINT32_MAX on failure.
5768 * @param pReNative The recompiler state.
5769 * @param off The current code buffer position.
5770 * @param idxHstSimdReg The host register to load the guest register value into.
5771 * @param enmGstSimdReg The guest register to load.
5772 * @param enmLoadSz The load size of the register.
5773 *
5774 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5775 * that is something the caller needs to do if applicable.
5776 */
5777DECL_HIDDEN_THROW(uint32_t)
5778iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5779 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5780{
5781 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5782
5783 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5784 switch (enmLoadSz)
5785 {
5786 case kIemNativeGstSimdRegLdStSz_256:
5787 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5788 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5789 case kIemNativeGstSimdRegLdStSz_Low128:
5790 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5791 case kIemNativeGstSimdRegLdStSz_High128:
5792 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5793 default:
5794 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5795 }
5796}
5797#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5798
5799#ifdef VBOX_STRICT
5800
5801/**
5802 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5803 *
5804 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5805 * Trashes EFLAGS on AMD64.
5806 */
5807DECL_HIDDEN_THROW(uint32_t)
5808iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5809{
5810# ifdef RT_ARCH_AMD64
5811 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5812
5813 /* rol reg64, 32 */
5814 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5815 pbCodeBuf[off++] = 0xc1;
5816 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5817 pbCodeBuf[off++] = 32;
5818
5819 /* test reg32, ffffffffh */
5820 if (idxReg >= 8)
5821 pbCodeBuf[off++] = X86_OP_REX_B;
5822 pbCodeBuf[off++] = 0xf7;
5823 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5824 pbCodeBuf[off++] = 0xff;
5825 pbCodeBuf[off++] = 0xff;
5826 pbCodeBuf[off++] = 0xff;
5827 pbCodeBuf[off++] = 0xff;
5828
5829 /* je/jz +1 */
5830 pbCodeBuf[off++] = 0x74;
5831 pbCodeBuf[off++] = 0x01;
5832
5833 /* int3 */
5834 pbCodeBuf[off++] = 0xcc;
5835
5836 /* rol reg64, 32 */
5837 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5838 pbCodeBuf[off++] = 0xc1;
5839 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5840 pbCodeBuf[off++] = 32;
5841
5842# elif defined(RT_ARCH_ARM64)
5843 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5844 /* lsr tmp0, reg64, #32 */
5845 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5846 /* cbz tmp0, +1 */
5847 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5848 /* brk #0x1100 */
5849 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5850
5851# else
5852# error "Port me!"
5853# endif
5854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5855 return off;
5856}
5857
5858
5859/**
5860 * Emitting code that checks that the content of register @a idxReg is the same
5861 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5862 * instruction if that's not the case.
5863 *
5864 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5865 * Trashes EFLAGS on AMD64.
5866 */
5867DECL_HIDDEN_THROW(uint32_t)
5868iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5869{
5870# ifdef RT_ARCH_AMD64
5871 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5872
5873 /* cmp reg, [mem] */
5874 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5875 {
5876 if (idxReg >= 8)
5877 pbCodeBuf[off++] = X86_OP_REX_R;
5878 pbCodeBuf[off++] = 0x38;
5879 }
5880 else
5881 {
5882 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5883 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5884 else
5885 {
5886 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5887 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5888 else
5889 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5890 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5891 if (idxReg >= 8)
5892 pbCodeBuf[off++] = X86_OP_REX_R;
5893 }
5894 pbCodeBuf[off++] = 0x39;
5895 }
5896 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5897
5898 /* je/jz +1 */
5899 pbCodeBuf[off++] = 0x74;
5900 pbCodeBuf[off++] = 0x01;
5901
5902 /* int3 */
5903 pbCodeBuf[off++] = 0xcc;
5904
5905 /* For values smaller than the register size, we must check that the rest
5906 of the register is all zeros. */
5907 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5908 {
5909 /* test reg64, imm32 */
5910 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5911 pbCodeBuf[off++] = 0xf7;
5912 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5913 pbCodeBuf[off++] = 0;
5914 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5915 pbCodeBuf[off++] = 0xff;
5916 pbCodeBuf[off++] = 0xff;
5917
5918 /* je/jz +1 */
5919 pbCodeBuf[off++] = 0x74;
5920 pbCodeBuf[off++] = 0x01;
5921
5922 /* int3 */
5923 pbCodeBuf[off++] = 0xcc;
5924 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5925 }
5926 else
5927 {
5928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5929 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5930 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5931 }
5932
5933# elif defined(RT_ARCH_ARM64)
5934 /* mov TMP0, [gstreg] */
5935 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5936
5937 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5938 /* sub tmp0, tmp0, idxReg */
5939 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5940 /* cbz tmp0, +1 */
5941 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5942 /* brk #0x1000+enmGstReg */
5943 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5944 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5945
5946# else
5947# error "Port me!"
5948# endif
5949 return off;
5950}
5951
5952
5953# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5954/**
5955 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5956 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5957 * instruction if that's not the case.
5958 *
5959 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5960 * Trashes EFLAGS on AMD64.
5961 */
5962DECL_HIDDEN_THROW(uint32_t)
5963iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5964 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5965{
5966 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5967 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5968 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5969 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5970 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5971 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5972 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5973 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5974 return off;
5975
5976# ifdef RT_ARCH_AMD64
5977 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
5978
5979 /* movdqa vectmp0, idxSimdReg */
5980 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5981
5982 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5983
5984 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5985 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5986 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
5987 pbCodeBuf[off++] = X86_OP_REX_R;
5988 pbCodeBuf[off++] = 0x0f;
5989 pbCodeBuf[off++] = 0x38;
5990 pbCodeBuf[off++] = 0x29;
5991 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5992
5993 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5994 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5995 pbCodeBuf[off++] = X86_OP_REX_W
5996 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
5997 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5998 pbCodeBuf[off++] = 0x0f;
5999 pbCodeBuf[off++] = 0x3a;
6000 pbCodeBuf[off++] = 0x16;
6001 pbCodeBuf[off++] = 0xeb;
6002 pbCodeBuf[off++] = 0x00;
6003
6004 /* cmp tmp0, 0xffffffffffffffff. */
6005 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6006 pbCodeBuf[off++] = 0x83;
6007 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6008 pbCodeBuf[off++] = 0xff;
6009
6010 /* je/jz +1 */
6011 pbCodeBuf[off++] = 0x74;
6012 pbCodeBuf[off++] = 0x01;
6013
6014 /* int3 */
6015 pbCodeBuf[off++] = 0xcc;
6016
6017 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6018 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6019 pbCodeBuf[off++] = X86_OP_REX_W
6020 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6021 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6022 pbCodeBuf[off++] = 0x0f;
6023 pbCodeBuf[off++] = 0x3a;
6024 pbCodeBuf[off++] = 0x16;
6025 pbCodeBuf[off++] = 0xeb;
6026 pbCodeBuf[off++] = 0x01;
6027
6028 /* cmp tmp0, 0xffffffffffffffff. */
6029 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6030 pbCodeBuf[off++] = 0x83;
6031 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6032 pbCodeBuf[off++] = 0xff;
6033
6034 /* je/jz +1 */
6035 pbCodeBuf[off++] = 0x74;
6036 pbCodeBuf[off++] = 0x01;
6037
6038 /* int3 */
6039 pbCodeBuf[off++] = 0xcc;
6040
6041# elif defined(RT_ARCH_ARM64)
6042 /* mov vectmp0, [gstreg] */
6043 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6044
6045 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6046 {
6047 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6048 /* eor vectmp0, vectmp0, idxSimdReg */
6049 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6050 /* cnt vectmp0, vectmp0, #0*/
6051 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6052 /* umov tmp0, vectmp0.D[0] */
6053 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6054 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6055 /* cbz tmp0, +1 */
6056 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6057 /* brk #0x1000+enmGstReg */
6058 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6059 }
6060
6061 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6062 {
6063 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6064 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6065 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6066 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6067 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6068 /* umov tmp0, (vectmp0 + 1).D[0] */
6069 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6070 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6071 /* cbz tmp0, +1 */
6072 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6073 /* brk #0x1000+enmGstReg */
6074 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6075 }
6076
6077# else
6078# error "Port me!"
6079# endif
6080
6081 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6082 return off;
6083}
6084# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6085
6086
6087/**
6088 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6089 * important bits.
6090 *
6091 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6092 * Trashes EFLAGS on AMD64.
6093 */
6094DECL_HIDDEN_THROW(uint32_t)
6095iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6096{
6097 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6098 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6099 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6100 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6101
6102#ifdef RT_ARCH_AMD64
6103 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6104
6105 /* je/jz +1 */
6106 pbCodeBuf[off++] = 0x74;
6107 pbCodeBuf[off++] = 0x01;
6108
6109 /* int3 */
6110 pbCodeBuf[off++] = 0xcc;
6111
6112# elif defined(RT_ARCH_ARM64)
6113 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6114
6115 /* b.eq +1 */
6116 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6117 /* brk #0x2000 */
6118 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6119
6120# else
6121# error "Port me!"
6122# endif
6123 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6124
6125 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6126 return off;
6127}
6128
6129#endif /* VBOX_STRICT */
6130
6131
6132#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6133/**
6134 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6135 */
6136DECL_HIDDEN_THROW(uint32_t)
6137iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6138{
6139 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6140
6141 fEflNeeded &= X86_EFL_STATUS_BITS;
6142 if (fEflNeeded)
6143 {
6144# ifdef RT_ARCH_AMD64
6145 /* test dword [pVCpu + offVCpu], imm32 */
6146 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6147 if (fEflNeeded <= 0xff)
6148 {
6149 pCodeBuf[off++] = 0xf6;
6150 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6151 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6152 }
6153 else
6154 {
6155 pCodeBuf[off++] = 0xf7;
6156 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6157 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6158 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6159 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6160 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6161 }
6162 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6163
6164# else
6165 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6166 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6167 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6168# ifdef RT_ARCH_ARM64
6169 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6170 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6171# else
6172# error "Port me!"
6173# endif
6174 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6175# endif
6176 }
6177 return off;
6178}
6179#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6180
6181
6182/**
6183 * Emits a code for checking the return code of a call and rcPassUp, returning
6184 * from the code if either are non-zero.
6185 */
6186DECL_HIDDEN_THROW(uint32_t)
6187iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6188{
6189#ifdef RT_ARCH_AMD64
6190 /*
6191 * AMD64: eax = call status code.
6192 */
6193
6194 /* edx = rcPassUp */
6195 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6196# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6197 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6198# endif
6199
6200 /* edx = eax | rcPassUp */
6201 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6202 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6203 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6204 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6205
6206 /* Jump to non-zero status return path. */
6207 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6208
6209 /* done. */
6210
6211#elif RT_ARCH_ARM64
6212 /*
6213 * ARM64: w0 = call status code.
6214 */
6215# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6216 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6217# endif
6218 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6219
6220 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6221
6222 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6223
6224 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6225 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6226 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6227
6228#else
6229# error "port me"
6230#endif
6231 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6232 RT_NOREF_PV(idxInstr);
6233 return off;
6234}
6235
6236
6237/**
6238 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6239 * raising a \#GP(0) if it isn't.
6240 *
6241 * @returns New code buffer offset, UINT32_MAX on failure.
6242 * @param pReNative The native recompile state.
6243 * @param off The code buffer offset.
6244 * @param idxAddrReg The host register with the address to check.
6245 * @param idxInstr The current instruction.
6246 */
6247DECL_HIDDEN_THROW(uint32_t)
6248iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6249{
6250 /*
6251 * Make sure we don't have any outstanding guest register writes as we may
6252 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6253 */
6254 off = iemNativeRegFlushPendingWrites(pReNative, off);
6255
6256#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6257 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6258#else
6259 RT_NOREF(idxInstr);
6260#endif
6261
6262#ifdef RT_ARCH_AMD64
6263 /*
6264 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6265 * return raisexcpt();
6266 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6267 */
6268 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6269
6270 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6271 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6272 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6273 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6274 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6275
6276 iemNativeRegFreeTmp(pReNative, iTmpReg);
6277
6278#elif defined(RT_ARCH_ARM64)
6279 /*
6280 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6281 * return raisexcpt();
6282 * ----
6283 * mov x1, 0x800000000000
6284 * add x1, x0, x1
6285 * cmp xzr, x1, lsr 48
6286 * b.ne .Lraisexcpt
6287 */
6288 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6289
6290 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6291 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6292 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6293 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6294
6295 iemNativeRegFreeTmp(pReNative, iTmpReg);
6296
6297#else
6298# error "Port me"
6299#endif
6300 return off;
6301}
6302
6303
6304/**
6305 * Emits code to check if that the content of @a idxAddrReg is within the limit
6306 * of CS, raising a \#GP(0) if it isn't.
6307 *
6308 * @returns New code buffer offset; throws VBox status code on error.
6309 * @param pReNative The native recompile state.
6310 * @param off The code buffer offset.
6311 * @param idxAddrReg The host register (32-bit) with the address to
6312 * check.
6313 * @param idxInstr The current instruction.
6314 */
6315DECL_HIDDEN_THROW(uint32_t)
6316iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6317 uint8_t idxAddrReg, uint8_t idxInstr)
6318{
6319 /*
6320 * Make sure we don't have any outstanding guest register writes as we may
6321 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6322 */
6323 off = iemNativeRegFlushPendingWrites(pReNative, off);
6324
6325#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6326 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6327#else
6328 RT_NOREF(idxInstr);
6329#endif
6330
6331 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6332 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6333 kIemNativeGstRegUse_ReadOnly);
6334
6335 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6336 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6337
6338 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6339 return off;
6340}
6341
6342
6343/**
6344 * Emits a call to a CImpl function or something similar.
6345 */
6346DECL_HIDDEN_THROW(uint32_t)
6347iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6348 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6349{
6350 /* Writeback everything. */
6351 off = iemNativeRegFlushPendingWrites(pReNative, off);
6352
6353 /*
6354 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6355 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6356 */
6357 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6358 fGstShwFlush
6359 | RT_BIT_64(kIemNativeGstReg_Pc)
6360 | RT_BIT_64(kIemNativeGstReg_EFlags));
6361 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6362
6363 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6364
6365 /*
6366 * Load the parameters.
6367 */
6368#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6369 /* Special code the hidden VBOXSTRICTRC pointer. */
6370 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6371 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6372 if (cAddParams > 0)
6373 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6374 if (cAddParams > 1)
6375 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6376 if (cAddParams > 2)
6377 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6378 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6379
6380#else
6381 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6382 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6383 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6384 if (cAddParams > 0)
6385 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6386 if (cAddParams > 1)
6387 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6388 if (cAddParams > 2)
6389# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6390 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6391# else
6392 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6393# endif
6394#endif
6395
6396 /*
6397 * Make the call.
6398 */
6399 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6400
6401#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6402 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6403#endif
6404
6405 /*
6406 * Check the status code.
6407 */
6408 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6409}
6410
6411
6412/**
6413 * Emits a call to a threaded worker function.
6414 */
6415DECL_HIDDEN_THROW(uint32_t)
6416iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6417{
6418 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6419
6420 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6421 off = iemNativeRegFlushPendingWrites(pReNative, off);
6422
6423 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6424 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6425
6426#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6427 /* The threaded function may throw / long jmp, so set current instruction
6428 number if we're counting. */
6429 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6430#endif
6431
6432 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6433
6434#ifdef RT_ARCH_AMD64
6435 /* Load the parameters and emit the call. */
6436# ifdef RT_OS_WINDOWS
6437# ifndef VBOXSTRICTRC_STRICT_ENABLED
6438 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6439 if (cParams > 0)
6440 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6441 if (cParams > 1)
6442 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6443 if (cParams > 2)
6444 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6445# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6446 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6447 if (cParams > 0)
6448 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6449 if (cParams > 1)
6450 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6451 if (cParams > 2)
6452 {
6453 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6454 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6455 }
6456 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6457# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6458# else
6459 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6460 if (cParams > 0)
6461 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6462 if (cParams > 1)
6463 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6464 if (cParams > 2)
6465 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6466# endif
6467
6468 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6469
6470# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6471 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6472# endif
6473
6474#elif RT_ARCH_ARM64
6475 /*
6476 * ARM64:
6477 */
6478 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6479 if (cParams > 0)
6480 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6481 if (cParams > 1)
6482 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6483 if (cParams > 2)
6484 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6485
6486 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6487
6488#else
6489# error "port me"
6490#endif
6491
6492 /*
6493 * Check the status code.
6494 */
6495 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6496
6497 return off;
6498}
6499
6500#ifdef VBOX_WITH_STATISTICS
6501/**
6502 * Emits code to update the thread call statistics.
6503 */
6504DECL_INLINE_THROW(uint32_t)
6505iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6506{
6507 /*
6508 * Update threaded function stats.
6509 */
6510 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6511 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6512# if defined(RT_ARCH_ARM64)
6513 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6514 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6515 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6516 iemNativeRegFreeTmp(pReNative, idxTmp1);
6517 iemNativeRegFreeTmp(pReNative, idxTmp2);
6518# else
6519 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6520# endif
6521 return off;
6522}
6523#endif /* VBOX_WITH_STATISTICS */
6524
6525
6526/**
6527 * Emits the code at the CheckBranchMiss label.
6528 */
6529static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6530{
6531 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6532 if (idxLabel != UINT32_MAX)
6533 {
6534 iemNativeLabelDefine(pReNative, idxLabel, off);
6535
6536 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6537 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6538 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6539
6540 /* jump back to the return sequence. */
6541 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6542 }
6543 return off;
6544}
6545
6546
6547/**
6548 * Emits the code at the NeedCsLimChecking label.
6549 */
6550static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6551{
6552 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6553 if (idxLabel != UINT32_MAX)
6554 {
6555 iemNativeLabelDefine(pReNative, idxLabel, off);
6556
6557 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6558 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6559 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6560
6561 /* jump back to the return sequence. */
6562 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6563 }
6564 return off;
6565}
6566
6567
6568/**
6569 * Emits the code at the ObsoleteTb label.
6570 */
6571static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6572{
6573 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6574 if (idxLabel != UINT32_MAX)
6575 {
6576 iemNativeLabelDefine(pReNative, idxLabel, off);
6577
6578 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6579 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6580 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6581
6582 /* jump back to the return sequence. */
6583 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6584 }
6585 return off;
6586}
6587
6588
6589/**
6590 * Emits the code at the RaiseGP0 label.
6591 */
6592static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6593{
6594 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6595 if (idxLabel != UINT32_MAX)
6596 {
6597 iemNativeLabelDefine(pReNative, idxLabel, off);
6598
6599 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6600 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6601 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6602
6603 /* jump back to the return sequence. */
6604 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6605 }
6606 return off;
6607}
6608
6609
6610/**
6611 * Emits the code at the RaiseNm label.
6612 */
6613static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6614{
6615 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
6616 if (idxLabel != UINT32_MAX)
6617 {
6618 iemNativeLabelDefine(pReNative, idxLabel, off);
6619
6620 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
6621 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6622 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
6623
6624 /* jump back to the return sequence. */
6625 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6626 }
6627 return off;
6628}
6629
6630
6631/**
6632 * Emits the code at the RaiseUd label.
6633 */
6634static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6635{
6636 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
6637 if (idxLabel != UINT32_MAX)
6638 {
6639 iemNativeLabelDefine(pReNative, idxLabel, off);
6640
6641 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
6642 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6643 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
6644
6645 /* jump back to the return sequence. */
6646 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6647 }
6648 return off;
6649}
6650
6651
6652/**
6653 * Emits the code at the RaiseMf label.
6654 */
6655static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6656{
6657 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
6658 if (idxLabel != UINT32_MAX)
6659 {
6660 iemNativeLabelDefine(pReNative, idxLabel, off);
6661
6662 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
6663 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6664 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
6665
6666 /* jump back to the return sequence. */
6667 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6668 }
6669 return off;
6670}
6671
6672
6673/**
6674 * Emits the code at the RaiseXf label.
6675 */
6676static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6677{
6678 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
6679 if (idxLabel != UINT32_MAX)
6680 {
6681 iemNativeLabelDefine(pReNative, idxLabel, off);
6682
6683 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
6684 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6685 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
6686
6687 /* jump back to the return sequence. */
6688 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6689 }
6690 return off;
6691}
6692
6693
6694/**
6695 * Emits the code at the ReturnWithFlags label (returns
6696 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6697 */
6698static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6699{
6700 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6701 if (idxLabel != UINT32_MAX)
6702 {
6703 iemNativeLabelDefine(pReNative, idxLabel, off);
6704
6705 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6706
6707 /* jump back to the return sequence. */
6708 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6709 }
6710 return off;
6711}
6712
6713
6714/**
6715 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6716 */
6717static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6718{
6719 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6720 if (idxLabel != UINT32_MAX)
6721 {
6722 iemNativeLabelDefine(pReNative, idxLabel, off);
6723
6724 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6725
6726 /* jump back to the return sequence. */
6727 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6728 }
6729 return off;
6730}
6731
6732
6733/**
6734 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6735 */
6736static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6737{
6738 /*
6739 * Generate the rc + rcPassUp fiddling code if needed.
6740 */
6741 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6742 if (idxLabel != UINT32_MAX)
6743 {
6744 iemNativeLabelDefine(pReNative, idxLabel, off);
6745
6746 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6747#ifdef RT_ARCH_AMD64
6748# ifdef RT_OS_WINDOWS
6749# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6750 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6751# endif
6752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6753 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6754# else
6755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6757# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6758 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6759# endif
6760# endif
6761# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6762 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6763# endif
6764
6765#else
6766 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6767 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6768 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6769#endif
6770
6771 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6772 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6773 }
6774 return off;
6775}
6776
6777
6778/**
6779 * Emits a standard epilog.
6780 */
6781static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6782{
6783 *pidxReturnLabel = UINT32_MAX;
6784
6785 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6786 off = iemNativeRegFlushPendingWrites(pReNative, off);
6787
6788 /*
6789 * Successful return, so clear the return register (eax, w0).
6790 */
6791 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6792
6793 /*
6794 * Define label for common return point.
6795 */
6796 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6797 *pidxReturnLabel = idxReturn;
6798
6799 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6800
6801 /*
6802 * Restore registers and return.
6803 */
6804#ifdef RT_ARCH_AMD64
6805 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6806
6807 /* Reposition esp at the r15 restore point. */
6808 pbCodeBuf[off++] = X86_OP_REX_W;
6809 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6810 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6811 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6812
6813 /* Pop non-volatile registers and return */
6814 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6815 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6816 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6817 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6818 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6819 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6820 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6821 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6822# ifdef RT_OS_WINDOWS
6823 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6824 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6825# endif
6826 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6827 pbCodeBuf[off++] = 0xc9; /* leave */
6828 pbCodeBuf[off++] = 0xc3; /* ret */
6829 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6830
6831#elif RT_ARCH_ARM64
6832 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6833
6834 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6835 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6836 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6837 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6838 IEMNATIVE_FRAME_VAR_SIZE / 8);
6839 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6840 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6841 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6842 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6843 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6844 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6845 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6846 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6847 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6848 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6849 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6850 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6851
6852 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6853 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6854 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6855 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6856
6857 /* retab / ret */
6858# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6859 if (1)
6860 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6861 else
6862# endif
6863 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6864
6865#else
6866# error "port me"
6867#endif
6868 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6869
6870 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6871}
6872
6873
6874/**
6875 * Emits a standard prolog.
6876 */
6877static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6878{
6879#ifdef RT_ARCH_AMD64
6880 /*
6881 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6882 * reserving 64 bytes for stack variables plus 4 non-register argument
6883 * slots. Fixed register assignment: xBX = pReNative;
6884 *
6885 * Since we always do the same register spilling, we can use the same
6886 * unwind description for all the code.
6887 */
6888 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6889 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6890 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6891 pbCodeBuf[off++] = 0x8b;
6892 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6893 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6894 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6895# ifdef RT_OS_WINDOWS
6896 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6897 pbCodeBuf[off++] = 0x8b;
6898 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6899 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6900 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6901# else
6902 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6903 pbCodeBuf[off++] = 0x8b;
6904 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6905# endif
6906 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6907 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6908 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6909 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6910 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6911 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6912 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6913 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6914
6915# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6916 /* Save the frame pointer. */
6917 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6918# endif
6919
6920 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6921 X86_GREG_xSP,
6922 IEMNATIVE_FRAME_ALIGN_SIZE
6923 + IEMNATIVE_FRAME_VAR_SIZE
6924 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6925 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6926 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6927 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6928 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6929
6930#elif RT_ARCH_ARM64
6931 /*
6932 * We set up a stack frame exactly like on x86, only we have to push the
6933 * return address our selves here. We save all non-volatile registers.
6934 */
6935 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6936
6937# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6938 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6939 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6940 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6941 /* pacibsp */
6942 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6943# endif
6944
6945 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6946 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6947 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6948 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6949 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6950 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6951 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6952 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6953 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6954 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6955 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6956 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6957 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6958 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6959 /* Save the BP and LR (ret address) registers at the top of the frame. */
6960 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6961 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6962 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6963 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6964 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6965 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6966
6967 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6968 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6969
6970 /* mov r28, r0 */
6971 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6972 /* mov r27, r1 */
6973 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6974
6975# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6976 /* Save the frame pointer. */
6977 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6978 ARMV8_A64_REG_X2);
6979# endif
6980
6981#else
6982# error "port me"
6983#endif
6984 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6985 return off;
6986}
6987
6988
6989/*********************************************************************************************************************************
6990* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6991*********************************************************************************************************************************/
6992
6993/**
6994 * Internal work that allocates a variable with kind set to
6995 * kIemNativeVarKind_Invalid and no current stack allocation.
6996 *
6997 * The kind will either be set by the caller or later when the variable is first
6998 * assigned a value.
6999 *
7000 * @returns Unpacked index.
7001 * @internal
7002 */
7003static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7004{
7005 Assert(cbType > 0 && cbType <= 64);
7006 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7007 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7008 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7009 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7010 pReNative->Core.aVars[idxVar].cbVar = cbType;
7011 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7012 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7013 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7014 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7015 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7016 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7017 pReNative->Core.aVars[idxVar].u.uValue = 0;
7018 return idxVar;
7019}
7020
7021
7022/**
7023 * Internal work that allocates an argument variable w/o setting enmKind.
7024 *
7025 * @returns Unpacked index.
7026 * @internal
7027 */
7028static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7029{
7030 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7031 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7032 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7033
7034 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7035 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7036 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7037 return idxVar;
7038}
7039
7040
7041/**
7042 * Gets the stack slot for a stack variable, allocating one if necessary.
7043 *
7044 * Calling this function implies that the stack slot will contain a valid
7045 * variable value. The caller deals with any register currently assigned to the
7046 * variable, typically by spilling it into the stack slot.
7047 *
7048 * @returns The stack slot number.
7049 * @param pReNative The recompiler state.
7050 * @param idxVar The variable.
7051 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7052 */
7053DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7054{
7055 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7056 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7057 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7058
7059 /* Already got a slot? */
7060 uint8_t const idxStackSlot = pVar->idxStackSlot;
7061 if (idxStackSlot != UINT8_MAX)
7062 {
7063 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7064 return idxStackSlot;
7065 }
7066
7067 /*
7068 * A single slot is easy to allocate.
7069 * Allocate them from the top end, closest to BP, to reduce the displacement.
7070 */
7071 if (pVar->cbVar <= sizeof(uint64_t))
7072 {
7073 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7074 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7075 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7076 pVar->idxStackSlot = (uint8_t)iSlot;
7077 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7078 return (uint8_t)iSlot;
7079 }
7080
7081 /*
7082 * We need more than one stack slot.
7083 *
7084 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7085 */
7086 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7087 Assert(pVar->cbVar <= 64);
7088 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7089 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7090 uint32_t bmStack = ~pReNative->Core.bmStack;
7091 while (bmStack != UINT32_MAX)
7092 {
7093/** @todo allocate from the top to reduce BP displacement. */
7094 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7095 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7096 if (!(iSlot & fBitAlignMask))
7097 {
7098 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7099 {
7100 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7101 pVar->idxStackSlot = (uint8_t)iSlot;
7102 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7103 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7104 return (uint8_t)iSlot;
7105 }
7106 }
7107 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7108 }
7109 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7110}
7111
7112
7113/**
7114 * Changes the variable to a stack variable.
7115 *
7116 * Currently this is s only possible to do the first time the variable is used,
7117 * switching later is can be implemented but not done.
7118 *
7119 * @param pReNative The recompiler state.
7120 * @param idxVar The variable.
7121 * @throws VERR_IEM_VAR_IPE_2
7122 */
7123DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7124{
7125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7126 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7127 if (pVar->enmKind != kIemNativeVarKind_Stack)
7128 {
7129 /* We could in theory transition from immediate to stack as well, but it
7130 would involve the caller doing work storing the value on the stack. So,
7131 till that's required we only allow transition from invalid. */
7132 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7133 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7134 pVar->enmKind = kIemNativeVarKind_Stack;
7135
7136 /* Note! We don't allocate a stack slot here, that's only done when a
7137 slot is actually needed to hold a variable value. */
7138 }
7139}
7140
7141
7142/**
7143 * Sets it to a variable with a constant value.
7144 *
7145 * This does not require stack storage as we know the value and can always
7146 * reload it, unless of course it's referenced.
7147 *
7148 * @param pReNative The recompiler state.
7149 * @param idxVar The variable.
7150 * @param uValue The immediate value.
7151 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7152 */
7153DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7154{
7155 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7156 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7157 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7158 {
7159 /* Only simple transitions for now. */
7160 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7161 pVar->enmKind = kIemNativeVarKind_Immediate;
7162 }
7163 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7164
7165 pVar->u.uValue = uValue;
7166 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7167 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7168 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7169}
7170
7171
7172/**
7173 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7174 *
7175 * This does not require stack storage as we know the value and can always
7176 * reload it. Loading is postponed till needed.
7177 *
7178 * @param pReNative The recompiler state.
7179 * @param idxVar The variable. Unpacked.
7180 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7181 *
7182 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7183 * @internal
7184 */
7185static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7186{
7187 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7188 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7189
7190 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7191 {
7192 /* Only simple transitions for now. */
7193 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7194 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7195 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7196 }
7197 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7198
7199 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7200
7201 /* Update the other variable, ensure it's a stack variable. */
7202 /** @todo handle variables with const values... that'll go boom now. */
7203 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7204 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7205}
7206
7207
7208/**
7209 * Sets the variable to a reference (pointer) to a guest register reference.
7210 *
7211 * This does not require stack storage as we know the value and can always
7212 * reload it. Loading is postponed till needed.
7213 *
7214 * @param pReNative The recompiler state.
7215 * @param idxVar The variable.
7216 * @param enmRegClass The class guest registers to reference.
7217 * @param idxReg The register within @a enmRegClass to reference.
7218 *
7219 * @throws VERR_IEM_VAR_IPE_2
7220 */
7221DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7222 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7223{
7224 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7225 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7226
7227 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7228 {
7229 /* Only simple transitions for now. */
7230 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7231 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7232 }
7233 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7234
7235 pVar->u.GstRegRef.enmClass = enmRegClass;
7236 pVar->u.GstRegRef.idx = idxReg;
7237}
7238
7239
7240DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7241{
7242 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7243}
7244
7245
7246DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7247{
7248 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7249
7250 /* Since we're using a generic uint64_t value type, we must truncate it if
7251 the variable is smaller otherwise we may end up with too large value when
7252 scaling up a imm8 w/ sign-extension.
7253
7254 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7255 in the bios, bx=1) when running on arm, because clang expect 16-bit
7256 register parameters to have bits 16 and up set to zero. Instead of
7257 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7258 CF value in the result. */
7259 switch (cbType)
7260 {
7261 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7262 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7263 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7264 }
7265 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7266 return idxVar;
7267}
7268
7269
7270DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7271{
7272 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7273 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7274 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7275 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7276 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7277 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7278
7279 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7280 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7281 return idxArgVar;
7282}
7283
7284
7285DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7286{
7287 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7288 /* Don't set to stack now, leave that to the first use as for instance
7289 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7290 return idxVar;
7291}
7292
7293
7294DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7295{
7296 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7297
7298 /* Since we're using a generic uint64_t value type, we must truncate it if
7299 the variable is smaller otherwise we may end up with too large value when
7300 scaling up a imm8 w/ sign-extension. */
7301 switch (cbType)
7302 {
7303 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7304 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7305 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7306 }
7307 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7308 return idxVar;
7309}
7310
7311
7312/**
7313 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7314 * fixed till we call iemNativeVarRegisterRelease.
7315 *
7316 * @returns The host register number.
7317 * @param pReNative The recompiler state.
7318 * @param idxVar The variable.
7319 * @param poff Pointer to the instruction buffer offset.
7320 * In case a register needs to be freed up or the value
7321 * loaded off the stack.
7322 * @param fInitialized Set if the variable must already have been initialized.
7323 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7324 * the case.
7325 * @param idxRegPref Preferred register number or UINT8_MAX.
7326 */
7327DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7328 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7329{
7330 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7331 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7332 Assert(pVar->cbVar <= 8);
7333 Assert(!pVar->fRegAcquired);
7334
7335 uint8_t idxReg = pVar->idxReg;
7336 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7337 {
7338 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7339 && pVar->enmKind < kIemNativeVarKind_End);
7340 pVar->fRegAcquired = true;
7341 return idxReg;
7342 }
7343
7344 /*
7345 * If the kind of variable has not yet been set, default to 'stack'.
7346 */
7347 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7348 && pVar->enmKind < kIemNativeVarKind_End);
7349 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7350 iemNativeVarSetKindToStack(pReNative, idxVar);
7351
7352 /*
7353 * We have to allocate a register for the variable, even if its a stack one
7354 * as we don't know if there are modification being made to it before its
7355 * finalized (todo: analyze and insert hints about that?).
7356 *
7357 * If we can, we try get the correct register for argument variables. This
7358 * is assuming that most argument variables are fetched as close as possible
7359 * to the actual call, so that there aren't any interfering hidden calls
7360 * (memory accesses, etc) inbetween.
7361 *
7362 * If we cannot or it's a variable, we make sure no argument registers
7363 * that will be used by this MC block will be allocated here, and we always
7364 * prefer non-volatile registers to avoid needing to spill stuff for internal
7365 * call.
7366 */
7367 /** @todo Detect too early argument value fetches and warn about hidden
7368 * calls causing less optimal code to be generated in the python script. */
7369
7370 uint8_t const uArgNo = pVar->uArgNo;
7371 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7372 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7373 {
7374 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7375 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7376 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7377 }
7378 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7379 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7380 {
7381 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7382 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7383 & ~pReNative->Core.bmHstRegsWithGstShadow
7384 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7385 & fNotArgsMask;
7386 if (fRegs)
7387 {
7388 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7389 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7390 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7391 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7392 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7393 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7394 }
7395 else
7396 {
7397 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7398 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7399 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7400 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7401 }
7402 }
7403 else
7404 {
7405 idxReg = idxRegPref;
7406 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7407 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7408 }
7409 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7410 pVar->idxReg = idxReg;
7411
7412 /*
7413 * Load it off the stack if we've got a stack slot.
7414 */
7415 uint8_t const idxStackSlot = pVar->idxStackSlot;
7416 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7417 {
7418 Assert(fInitialized);
7419 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7420 switch (pVar->cbVar)
7421 {
7422 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7423 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7424 case 3: AssertFailed(); RT_FALL_THRU();
7425 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7426 default: AssertFailed(); RT_FALL_THRU();
7427 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7428 }
7429 }
7430 else
7431 {
7432 Assert(idxStackSlot == UINT8_MAX);
7433 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7434 }
7435 pVar->fRegAcquired = true;
7436 return idxReg;
7437}
7438
7439
7440/**
7441 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7442 * guest register.
7443 *
7444 * This function makes sure there is a register for it and sets it to be the
7445 * current shadow copy of @a enmGstReg.
7446 *
7447 * @returns The host register number.
7448 * @param pReNative The recompiler state.
7449 * @param idxVar The variable.
7450 * @param enmGstReg The guest register this variable will be written to
7451 * after this call.
7452 * @param poff Pointer to the instruction buffer offset.
7453 * In case a register needs to be freed up or if the
7454 * variable content needs to be loaded off the stack.
7455 *
7456 * @note We DO NOT expect @a idxVar to be an argument variable,
7457 * because we can only in the commit stage of an instruction when this
7458 * function is used.
7459 */
7460DECL_HIDDEN_THROW(uint8_t)
7461iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7462{
7463 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7464 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7465 Assert(!pVar->fRegAcquired);
7466 AssertMsgStmt( pVar->cbVar <= 8
7467 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7468 || pVar->enmKind == kIemNativeVarKind_Stack),
7469 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7470 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7471 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7472
7473 /*
7474 * This shouldn't ever be used for arguments, unless it's in a weird else
7475 * branch that doesn't do any calling and even then it's questionable.
7476 *
7477 * However, in case someone writes crazy wrong MC code and does register
7478 * updates before making calls, just use the regular register allocator to
7479 * ensure we get a register suitable for the intended argument number.
7480 */
7481 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7482
7483 /*
7484 * If there is already a register for the variable, we transfer/set the
7485 * guest shadow copy assignment to it.
7486 */
7487 uint8_t idxReg = pVar->idxReg;
7488 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7489 {
7490 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7491 {
7492 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7493 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7494 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7495 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7496 }
7497 else
7498 {
7499 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7500 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7501 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7502 }
7503 /** @todo figure this one out. We need some way of making sure the register isn't
7504 * modified after this point, just in case we start writing crappy MC code. */
7505 pVar->enmGstReg = enmGstReg;
7506 pVar->fRegAcquired = true;
7507 return idxReg;
7508 }
7509 Assert(pVar->uArgNo == UINT8_MAX);
7510
7511 /*
7512 * Because this is supposed to be the commit stage, we're just tag along with the
7513 * temporary register allocator and upgrade it to a variable register.
7514 */
7515 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7516 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7517 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7518 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7519 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7520 pVar->idxReg = idxReg;
7521
7522 /*
7523 * Now we need to load the register value.
7524 */
7525 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7526 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7527 else
7528 {
7529 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7530 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7531 switch (pVar->cbVar)
7532 {
7533 case sizeof(uint64_t):
7534 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7535 break;
7536 case sizeof(uint32_t):
7537 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7538 break;
7539 case sizeof(uint16_t):
7540 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7541 break;
7542 case sizeof(uint8_t):
7543 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7544 break;
7545 default:
7546 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7547 }
7548 }
7549
7550 pVar->fRegAcquired = true;
7551 return idxReg;
7552}
7553
7554
7555/**
7556 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7557 *
7558 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7559 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7560 * requirement of flushing anything in volatile host registers when making a
7561 * call.
7562 *
7563 * @returns New @a off value.
7564 * @param pReNative The recompiler state.
7565 * @param off The code buffer position.
7566 * @param fHstRegsNotToSave Set of registers not to save & restore.
7567 */
7568DECL_HIDDEN_THROW(uint32_t)
7569iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7570{
7571 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7572 if (fHstRegs)
7573 {
7574 do
7575 {
7576 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7577 fHstRegs &= ~RT_BIT_32(idxHstReg);
7578
7579 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7580 {
7581 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7582 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7583 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7584 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7585 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7586 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7587 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7588 {
7589 case kIemNativeVarKind_Stack:
7590 {
7591 /* Temporarily spill the variable register. */
7592 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7593 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7594 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7595 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7596 continue;
7597 }
7598
7599 case kIemNativeVarKind_Immediate:
7600 case kIemNativeVarKind_VarRef:
7601 case kIemNativeVarKind_GstRegRef:
7602 /* It is weird to have any of these loaded at this point. */
7603 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7604 continue;
7605
7606 case kIemNativeVarKind_End:
7607 case kIemNativeVarKind_Invalid:
7608 break;
7609 }
7610 AssertFailed();
7611 }
7612 else
7613 {
7614 /*
7615 * Allocate a temporary stack slot and spill the register to it.
7616 */
7617 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7618 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7619 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7620 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7621 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7622 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7623 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7624 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7625 }
7626 } while (fHstRegs);
7627 }
7628 return off;
7629}
7630
7631
7632/**
7633 * Emit code to restore volatile registers after to a call to a helper.
7634 *
7635 * @returns New @a off value.
7636 * @param pReNative The recompiler state.
7637 * @param off The code buffer position.
7638 * @param fHstRegsNotToSave Set of registers not to save & restore.
7639 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7640 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7641 */
7642DECL_HIDDEN_THROW(uint32_t)
7643iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7644{
7645 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7646 if (fHstRegs)
7647 {
7648 do
7649 {
7650 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7651 fHstRegs &= ~RT_BIT_32(idxHstReg);
7652
7653 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7654 {
7655 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7656 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7657 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7658 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7659 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7660 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7661 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7662 {
7663 case kIemNativeVarKind_Stack:
7664 {
7665 /* Unspill the variable register. */
7666 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7667 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7668 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7669 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7670 continue;
7671 }
7672
7673 case kIemNativeVarKind_Immediate:
7674 case kIemNativeVarKind_VarRef:
7675 case kIemNativeVarKind_GstRegRef:
7676 /* It is weird to have any of these loaded at this point. */
7677 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7678 continue;
7679
7680 case kIemNativeVarKind_End:
7681 case kIemNativeVarKind_Invalid:
7682 break;
7683 }
7684 AssertFailed();
7685 }
7686 else
7687 {
7688 /*
7689 * Restore from temporary stack slot.
7690 */
7691 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7692 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7693 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7694 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7695
7696 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7697 }
7698 } while (fHstRegs);
7699 }
7700 return off;
7701}
7702
7703
7704/**
7705 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7706 *
7707 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7708 *
7709 * ASSUMES that @a idxVar is valid and unpacked.
7710 */
7711DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7712{
7713 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7714 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7715 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7716 {
7717 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7718 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7719 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7720 Assert(cSlots > 0);
7721 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7722 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7723 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7724 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7725 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7726 }
7727 else
7728 Assert(idxStackSlot == UINT8_MAX);
7729}
7730
7731
7732/**
7733 * Worker that frees a single variable.
7734 *
7735 * ASSUMES that @a idxVar is valid and unpacked.
7736 */
7737DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7738{
7739 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7740 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7741 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7742
7743 /* Free the host register first if any assigned. */
7744 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7745 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7746 {
7747 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7748 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7749 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7750 }
7751
7752 /* Free argument mapping. */
7753 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7754 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7755 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7756
7757 /* Free the stack slots. */
7758 iemNativeVarFreeStackSlots(pReNative, idxVar);
7759
7760 /* Free the actual variable. */
7761 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7762 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7763}
7764
7765
7766/**
7767 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7768 */
7769DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7770{
7771 while (bmVars != 0)
7772 {
7773 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7774 bmVars &= ~RT_BIT_32(idxVar);
7775
7776#if 1 /** @todo optimize by simplifying this later... */
7777 iemNativeVarFreeOneWorker(pReNative, idxVar);
7778#else
7779 /* Only need to free the host register, the rest is done as bulk updates below. */
7780 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7781 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7782 {
7783 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7784 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7785 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7786 }
7787#endif
7788 }
7789#if 0 /** @todo optimize by simplifying this later... */
7790 pReNative->Core.bmVars = 0;
7791 pReNative->Core.bmStack = 0;
7792 pReNative->Core.u64ArgVars = UINT64_MAX;
7793#endif
7794}
7795
7796
7797
7798/*********************************************************************************************************************************
7799* Emitters for IEM_MC_CALL_CIMPL_XXX *
7800*********************************************************************************************************************************/
7801
7802/**
7803 * Emits code to load a reference to the given guest register into @a idxGprDst.
7804 */
7805DECL_INLINE_THROW(uint32_t)
7806iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7807 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7808{
7809#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7810 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7811#endif
7812
7813 /*
7814 * Get the offset relative to the CPUMCTX structure.
7815 */
7816 uint32_t offCpumCtx;
7817 switch (enmClass)
7818 {
7819 case kIemNativeGstRegRef_Gpr:
7820 Assert(idxRegInClass < 16);
7821 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7822 break;
7823
7824 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7825 Assert(idxRegInClass < 4);
7826 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7827 break;
7828
7829 case kIemNativeGstRegRef_EFlags:
7830 Assert(idxRegInClass == 0);
7831 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7832 break;
7833
7834 case kIemNativeGstRegRef_MxCsr:
7835 Assert(idxRegInClass == 0);
7836 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7837 break;
7838
7839 case kIemNativeGstRegRef_FpuReg:
7840 Assert(idxRegInClass < 8);
7841 AssertFailed(); /** @todo what kind of indexing? */
7842 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7843 break;
7844
7845 case kIemNativeGstRegRef_MReg:
7846 Assert(idxRegInClass < 8);
7847 AssertFailed(); /** @todo what kind of indexing? */
7848 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7849 break;
7850
7851 case kIemNativeGstRegRef_XReg:
7852 Assert(idxRegInClass < 16);
7853 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7854 break;
7855
7856 default:
7857 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7858 }
7859
7860 /*
7861 * Load the value into the destination register.
7862 */
7863#ifdef RT_ARCH_AMD64
7864 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7865
7866#elif defined(RT_ARCH_ARM64)
7867 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7868 Assert(offCpumCtx < 4096);
7869 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7870
7871#else
7872# error "Port me!"
7873#endif
7874
7875 return off;
7876}
7877
7878
7879/**
7880 * Common code for CIMPL and AIMPL calls.
7881 *
7882 * These are calls that uses argument variables and such. They should not be
7883 * confused with internal calls required to implement an MC operation,
7884 * like a TLB load and similar.
7885 *
7886 * Upon return all that is left to do is to load any hidden arguments and
7887 * perform the call. All argument variables are freed.
7888 *
7889 * @returns New code buffer offset; throws VBox status code on error.
7890 * @param pReNative The native recompile state.
7891 * @param off The code buffer offset.
7892 * @param cArgs The total nubmer of arguments (includes hidden
7893 * count).
7894 * @param cHiddenArgs The number of hidden arguments. The hidden
7895 * arguments must not have any variable declared for
7896 * them, whereas all the regular arguments must
7897 * (tstIEMCheckMc ensures this).
7898 */
7899DECL_HIDDEN_THROW(uint32_t)
7900iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7901{
7902#ifdef VBOX_STRICT
7903 /*
7904 * Assert sanity.
7905 */
7906 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7907 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7908 for (unsigned i = 0; i < cHiddenArgs; i++)
7909 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7910 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7911 {
7912 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7913 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7914 }
7915 iemNativeRegAssertSanity(pReNative);
7916#endif
7917
7918 /* We don't know what the called function makes use of, so flush any pending register writes. */
7919 off = iemNativeRegFlushPendingWrites(pReNative, off);
7920
7921 /*
7922 * Before we do anything else, go over variables that are referenced and
7923 * make sure they are not in a register.
7924 */
7925 uint32_t bmVars = pReNative->Core.bmVars;
7926 if (bmVars)
7927 {
7928 do
7929 {
7930 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7931 bmVars &= ~RT_BIT_32(idxVar);
7932
7933 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7934 {
7935 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7936 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7937 {
7938 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7939 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7940 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7941 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7942 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7943
7944 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7945 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7946 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7947 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7948 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7949 }
7950 }
7951 } while (bmVars != 0);
7952#if 0 //def VBOX_STRICT
7953 iemNativeRegAssertSanity(pReNative);
7954#endif
7955 }
7956
7957 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7958
7959 /*
7960 * First, go over the host registers that will be used for arguments and make
7961 * sure they either hold the desired argument or are free.
7962 */
7963 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7964 {
7965 for (uint32_t i = 0; i < cRegArgs; i++)
7966 {
7967 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7968 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7969 {
7970 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7971 {
7972 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7973 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7974 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7975 Assert(pVar->idxReg == idxArgReg);
7976 uint8_t const uArgNo = pVar->uArgNo;
7977 if (uArgNo == i)
7978 { /* prefect */ }
7979 /* The variable allocator logic should make sure this is impossible,
7980 except for when the return register is used as a parameter (ARM,
7981 but not x86). */
7982#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7983 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7984 {
7985# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7986# error "Implement this"
7987# endif
7988 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7989 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7990 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7991 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7992 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7993 }
7994#endif
7995 else
7996 {
7997 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7998
7999 if (pVar->enmKind == kIemNativeVarKind_Stack)
8000 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8001 else
8002 {
8003 /* just free it, can be reloaded if used again */
8004 pVar->idxReg = UINT8_MAX;
8005 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8006 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8007 }
8008 }
8009 }
8010 else
8011 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8012 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8013 }
8014 }
8015#if 0 //def VBOX_STRICT
8016 iemNativeRegAssertSanity(pReNative);
8017#endif
8018 }
8019
8020 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8021
8022#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8023 /*
8024 * If there are any stack arguments, make sure they are in their place as well.
8025 *
8026 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8027 * the caller) be loading it later and it must be free (see first loop).
8028 */
8029 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8030 {
8031 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8032 {
8033 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8034 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8035 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8036 {
8037 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8038 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8039 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8040 pVar->idxReg = UINT8_MAX;
8041 }
8042 else
8043 {
8044 /* Use ARG0 as temp for stuff we need registers for. */
8045 switch (pVar->enmKind)
8046 {
8047 case kIemNativeVarKind_Stack:
8048 {
8049 uint8_t const idxStackSlot = pVar->idxStackSlot;
8050 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8051 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8052 iemNativeStackCalcBpDisp(idxStackSlot));
8053 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8054 continue;
8055 }
8056
8057 case kIemNativeVarKind_Immediate:
8058 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8059 continue;
8060
8061 case kIemNativeVarKind_VarRef:
8062 {
8063 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8064 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8065 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8066 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8067 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8068 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8069 {
8070 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8071 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8072 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8073 }
8074 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8075 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8076 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8077 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8078 continue;
8079 }
8080
8081 case kIemNativeVarKind_GstRegRef:
8082 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8083 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8084 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8085 continue;
8086
8087 case kIemNativeVarKind_Invalid:
8088 case kIemNativeVarKind_End:
8089 break;
8090 }
8091 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8092 }
8093 }
8094# if 0 //def VBOX_STRICT
8095 iemNativeRegAssertSanity(pReNative);
8096# endif
8097 }
8098#else
8099 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8100#endif
8101
8102 /*
8103 * Make sure the argument variables are loaded into their respective registers.
8104 *
8105 * We can optimize this by ASSUMING that any register allocations are for
8106 * registeres that have already been loaded and are ready. The previous step
8107 * saw to that.
8108 */
8109 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8110 {
8111 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8112 {
8113 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8114 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8115 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8116 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8117 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8118 else
8119 {
8120 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8121 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8122 {
8123 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8124 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8125 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8126 | RT_BIT_32(idxArgReg);
8127 pVar->idxReg = idxArgReg;
8128 }
8129 else
8130 {
8131 /* Use ARG0 as temp for stuff we need registers for. */
8132 switch (pVar->enmKind)
8133 {
8134 case kIemNativeVarKind_Stack:
8135 {
8136 uint8_t const idxStackSlot = pVar->idxStackSlot;
8137 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8138 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8139 continue;
8140 }
8141
8142 case kIemNativeVarKind_Immediate:
8143 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8144 continue;
8145
8146 case kIemNativeVarKind_VarRef:
8147 {
8148 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8149 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8150 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8151 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8152 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8153 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8154 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8155 {
8156 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8157 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8158 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8159 }
8160 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8161 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8162 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8163 continue;
8164 }
8165
8166 case kIemNativeVarKind_GstRegRef:
8167 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8168 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8169 continue;
8170
8171 case kIemNativeVarKind_Invalid:
8172 case kIemNativeVarKind_End:
8173 break;
8174 }
8175 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8176 }
8177 }
8178 }
8179#if 0 //def VBOX_STRICT
8180 iemNativeRegAssertSanity(pReNative);
8181#endif
8182 }
8183#ifdef VBOX_STRICT
8184 else
8185 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8186 {
8187 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8188 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8189 }
8190#endif
8191
8192 /*
8193 * Free all argument variables (simplified).
8194 * Their lifetime always expires with the call they are for.
8195 */
8196 /** @todo Make the python script check that arguments aren't used after
8197 * IEM_MC_CALL_XXXX. */
8198 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8199 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8200 * an argument value. There is also some FPU stuff. */
8201 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8202 {
8203 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8204 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8205
8206 /* no need to free registers: */
8207 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8208 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8209 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8210 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8211 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8212 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8213
8214 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8215 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8216 iemNativeVarFreeStackSlots(pReNative, idxVar);
8217 }
8218 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8219
8220 /*
8221 * Flush volatile registers as we make the call.
8222 */
8223 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8224
8225 return off;
8226}
8227
8228
8229
8230/*********************************************************************************************************************************
8231* TLB Lookup. *
8232*********************************************************************************************************************************/
8233
8234/**
8235 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8236 */
8237DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8238{
8239 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8240 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8241 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8242 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8243
8244 /* Do the lookup manually. */
8245 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8246 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8247 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8248 if (RT_LIKELY(pTlbe->uTag == uTag))
8249 {
8250 /*
8251 * Check TLB page table level access flags.
8252 */
8253 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8254 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8255 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8256 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8257 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8258 | IEMTLBE_F_PG_UNASSIGNED
8259 | IEMTLBE_F_PT_NO_ACCESSED
8260 | fNoWriteNoDirty | fNoUser);
8261 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8262 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8263 {
8264 /*
8265 * Return the address.
8266 */
8267 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8268 if ((uintptr_t)pbAddr == uResult)
8269 return;
8270 RT_NOREF(cbMem);
8271 AssertFailed();
8272 }
8273 else
8274 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8275 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8276 }
8277 else
8278 AssertFailed();
8279 RT_BREAKPOINT();
8280}
8281
8282/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8283
8284
8285
8286/*********************************************************************************************************************************
8287* Recompiler Core. *
8288*********************************************************************************************************************************/
8289
8290/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8291static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8292{
8293 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8294 pDis->cbCachedInstr += cbMaxRead;
8295 RT_NOREF(cbMinRead);
8296 return VERR_NO_DATA;
8297}
8298
8299
8300DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8301{
8302 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8303 {
8304#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8305 ENTRY(fLocalForcedActions),
8306 ENTRY(iem.s.rcPassUp),
8307 ENTRY(iem.s.fExec),
8308 ENTRY(iem.s.pbInstrBuf),
8309 ENTRY(iem.s.uInstrBufPc),
8310 ENTRY(iem.s.GCPhysInstrBuf),
8311 ENTRY(iem.s.cbInstrBufTotal),
8312 ENTRY(iem.s.idxTbCurInstr),
8313#ifdef VBOX_WITH_STATISTICS
8314 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8315 ENTRY(iem.s.StatNativeTlbHitsForStore),
8316 ENTRY(iem.s.StatNativeTlbHitsForStack),
8317 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8318 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8319 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8320 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8321 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8322#endif
8323 ENTRY(iem.s.DataTlb.aEntries),
8324 ENTRY(iem.s.DataTlb.uTlbRevision),
8325 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8326 ENTRY(iem.s.DataTlb.cTlbHits),
8327 ENTRY(iem.s.CodeTlb.aEntries),
8328 ENTRY(iem.s.CodeTlb.uTlbRevision),
8329 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8330 ENTRY(iem.s.CodeTlb.cTlbHits),
8331 ENTRY(pVMR3),
8332 ENTRY(cpum.GstCtx.rax),
8333 ENTRY(cpum.GstCtx.ah),
8334 ENTRY(cpum.GstCtx.rcx),
8335 ENTRY(cpum.GstCtx.ch),
8336 ENTRY(cpum.GstCtx.rdx),
8337 ENTRY(cpum.GstCtx.dh),
8338 ENTRY(cpum.GstCtx.rbx),
8339 ENTRY(cpum.GstCtx.bh),
8340 ENTRY(cpum.GstCtx.rsp),
8341 ENTRY(cpum.GstCtx.rbp),
8342 ENTRY(cpum.GstCtx.rsi),
8343 ENTRY(cpum.GstCtx.rdi),
8344 ENTRY(cpum.GstCtx.r8),
8345 ENTRY(cpum.GstCtx.r9),
8346 ENTRY(cpum.GstCtx.r10),
8347 ENTRY(cpum.GstCtx.r11),
8348 ENTRY(cpum.GstCtx.r12),
8349 ENTRY(cpum.GstCtx.r13),
8350 ENTRY(cpum.GstCtx.r14),
8351 ENTRY(cpum.GstCtx.r15),
8352 ENTRY(cpum.GstCtx.es.Sel),
8353 ENTRY(cpum.GstCtx.es.u64Base),
8354 ENTRY(cpum.GstCtx.es.u32Limit),
8355 ENTRY(cpum.GstCtx.es.Attr),
8356 ENTRY(cpum.GstCtx.cs.Sel),
8357 ENTRY(cpum.GstCtx.cs.u64Base),
8358 ENTRY(cpum.GstCtx.cs.u32Limit),
8359 ENTRY(cpum.GstCtx.cs.Attr),
8360 ENTRY(cpum.GstCtx.ss.Sel),
8361 ENTRY(cpum.GstCtx.ss.u64Base),
8362 ENTRY(cpum.GstCtx.ss.u32Limit),
8363 ENTRY(cpum.GstCtx.ss.Attr),
8364 ENTRY(cpum.GstCtx.ds.Sel),
8365 ENTRY(cpum.GstCtx.ds.u64Base),
8366 ENTRY(cpum.GstCtx.ds.u32Limit),
8367 ENTRY(cpum.GstCtx.ds.Attr),
8368 ENTRY(cpum.GstCtx.fs.Sel),
8369 ENTRY(cpum.GstCtx.fs.u64Base),
8370 ENTRY(cpum.GstCtx.fs.u32Limit),
8371 ENTRY(cpum.GstCtx.fs.Attr),
8372 ENTRY(cpum.GstCtx.gs.Sel),
8373 ENTRY(cpum.GstCtx.gs.u64Base),
8374 ENTRY(cpum.GstCtx.gs.u32Limit),
8375 ENTRY(cpum.GstCtx.gs.Attr),
8376 ENTRY(cpum.GstCtx.rip),
8377 ENTRY(cpum.GstCtx.eflags),
8378 ENTRY(cpum.GstCtx.uRipInhibitInt),
8379#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8380 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8381 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8382 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8383 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8384 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8385 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8386 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8387 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8388 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8389 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8390 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8391 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8392 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8393 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8394 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8395 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8396 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8397 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8398 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8399 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8400 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8401 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8402 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8403 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8404 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8405 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8406 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8407 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8408 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8409 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8410 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8411 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8412#endif
8413#undef ENTRY
8414 };
8415#ifdef VBOX_STRICT
8416 static bool s_fOrderChecked = false;
8417 if (!s_fOrderChecked)
8418 {
8419 s_fOrderChecked = true;
8420 uint32_t offPrev = s_aMembers[0].off;
8421 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8422 {
8423 Assert(s_aMembers[i].off > offPrev);
8424 offPrev = s_aMembers[i].off;
8425 }
8426 }
8427#endif
8428
8429 /*
8430 * Binary lookup.
8431 */
8432 unsigned iStart = 0;
8433 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8434 for (;;)
8435 {
8436 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8437 uint32_t const offCur = s_aMembers[iCur].off;
8438 if (off < offCur)
8439 {
8440 if (iCur != iStart)
8441 iEnd = iCur;
8442 else
8443 break;
8444 }
8445 else if (off > offCur)
8446 {
8447 if (iCur + 1 < iEnd)
8448 iStart = iCur + 1;
8449 else
8450 break;
8451 }
8452 else
8453 return s_aMembers[iCur].pszName;
8454 }
8455#ifdef VBOX_WITH_STATISTICS
8456 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8457 return "iem.s.acThreadedFuncStats[iFn]";
8458#endif
8459 return NULL;
8460}
8461
8462
8463/**
8464 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
8465 * @returns pszBuf.
8466 * @param fFlags The flags.
8467 * @param pszBuf The output buffer.
8468 * @param cbBuf The output buffer size. At least 32 bytes.
8469 */
8470DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
8471{
8472 Assert(cbBuf >= 32);
8473 static RTSTRTUPLE const s_aModes[] =
8474 {
8475 /* [00] = */ { RT_STR_TUPLE("16BIT") },
8476 /* [01] = */ { RT_STR_TUPLE("32BIT") },
8477 /* [02] = */ { RT_STR_TUPLE("!2!") },
8478 /* [03] = */ { RT_STR_TUPLE("!3!") },
8479 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
8480 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
8481 /* [06] = */ { RT_STR_TUPLE("!6!") },
8482 /* [07] = */ { RT_STR_TUPLE("!7!") },
8483 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
8484 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
8485 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
8486 /* [0b] = */ { RT_STR_TUPLE("!b!") },
8487 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
8488 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
8489 /* [0e] = */ { RT_STR_TUPLE("!e!") },
8490 /* [0f] = */ { RT_STR_TUPLE("!f!") },
8491 /* [10] = */ { RT_STR_TUPLE("!10!") },
8492 /* [11] = */ { RT_STR_TUPLE("!11!") },
8493 /* [12] = */ { RT_STR_TUPLE("!12!") },
8494 /* [13] = */ { RT_STR_TUPLE("!13!") },
8495 /* [14] = */ { RT_STR_TUPLE("!14!") },
8496 /* [15] = */ { RT_STR_TUPLE("!15!") },
8497 /* [16] = */ { RT_STR_TUPLE("!16!") },
8498 /* [17] = */ { RT_STR_TUPLE("!17!") },
8499 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
8500 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
8501 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
8502 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
8503 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
8504 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
8505 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
8506 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
8507 };
8508 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
8509 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
8510 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
8511
8512 pszBuf[off++] = ' ';
8513 pszBuf[off++] = 'C';
8514 pszBuf[off++] = 'P';
8515 pszBuf[off++] = 'L';
8516 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
8517 Assert(off < 32);
8518
8519 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
8520
8521 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
8522 {
8523 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
8524 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
8525 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
8526 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
8527 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
8528 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
8529 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
8530 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
8531 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
8532 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
8533 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
8534 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
8535 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
8536 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
8537 };
8538 if (fFlags)
8539 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
8540 if (s_aFlags[i].fFlag & fFlags)
8541 {
8542 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
8543 pszBuf[off++] = ' ';
8544 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
8545 off += s_aFlags[i].cchName;
8546 fFlags &= ~s_aFlags[i].fFlag;
8547 if (!fFlags)
8548 break;
8549 }
8550 pszBuf[off] = '\0';
8551
8552 return pszBuf;
8553}
8554
8555
8556DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8557{
8558 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8559#if defined(RT_ARCH_AMD64)
8560 static const char * const a_apszMarkers[] =
8561 {
8562 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8563 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8564 };
8565#endif
8566
8567 char szDisBuf[512];
8568 DISSTATE Dis;
8569 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8570 uint32_t const cNative = pTb->Native.cInstructions;
8571 uint32_t offNative = 0;
8572#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8573 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8574#endif
8575 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8576 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8577 : DISCPUMODE_64BIT;
8578#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8579 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8580#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8581 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8582#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8583# error "Port me"
8584#else
8585 csh hDisasm = ~(size_t)0;
8586# if defined(RT_ARCH_AMD64)
8587 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8588# elif defined(RT_ARCH_ARM64)
8589 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8590# else
8591# error "Port me"
8592# endif
8593 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8594
8595 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8596 //Assert(rcCs == CS_ERR_OK);
8597#endif
8598
8599 /*
8600 * Print TB info.
8601 */
8602 pHlp->pfnPrintf(pHlp,
8603 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8604 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8605 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8606 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8607#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8608 if (pDbgInfo && pDbgInfo->cEntries > 1)
8609 {
8610 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8611
8612 /*
8613 * This disassembly is driven by the debug info which follows the native
8614 * code and indicates when it starts with the next guest instructions,
8615 * where labels are and such things.
8616 */
8617 uint32_t idxThreadedCall = 0;
8618 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8619 uint8_t idxRange = UINT8_MAX;
8620 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8621 uint32_t offRange = 0;
8622 uint32_t offOpcodes = 0;
8623 uint32_t const cbOpcodes = pTb->cbOpcodes;
8624 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8625 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8626 uint32_t iDbgEntry = 1;
8627 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8628
8629 while (offNative < cNative)
8630 {
8631 /* If we're at or have passed the point where the next chunk of debug
8632 info starts, process it. */
8633 if (offDbgNativeNext <= offNative)
8634 {
8635 offDbgNativeNext = UINT32_MAX;
8636 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8637 {
8638 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8639 {
8640 case kIemTbDbgEntryType_GuestInstruction:
8641 {
8642 /* Did the exec flag change? */
8643 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8644 {
8645 pHlp->pfnPrintf(pHlp,
8646 " fExec change %#08x -> %#08x %s\n",
8647 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8648 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8649 szDisBuf, sizeof(szDisBuf)));
8650 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8651 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8652 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8653 : DISCPUMODE_64BIT;
8654 }
8655
8656 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8657 where the compilation was aborted before the opcode was recorded and the actual
8658 instruction was translated to a threaded call. This may happen when we run out
8659 of ranges, or when some complicated interrupts/FFs are found to be pending or
8660 similar. So, we just deal with it here rather than in the compiler code as it
8661 is a lot simpler to do here. */
8662 if ( idxRange == UINT8_MAX
8663 || idxRange >= cRanges
8664 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8665 {
8666 idxRange += 1;
8667 if (idxRange < cRanges)
8668 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8669 else
8670 continue;
8671 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8672 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8673 + (pTb->aRanges[idxRange].idxPhysPage == 0
8674 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8675 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8676 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8677 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8678 pTb->aRanges[idxRange].idxPhysPage);
8679 GCPhysPc += offRange;
8680 }
8681
8682 /* Disassemble the instruction. */
8683 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8684 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8685 uint32_t cbInstr = 1;
8686 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8687 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8688 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8689 if (RT_SUCCESS(rc))
8690 {
8691 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8692 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8693 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8694 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8695
8696 static unsigned const s_offMarker = 55;
8697 static char const s_szMarker[] = " ; <--- guest";
8698 if (cch < s_offMarker)
8699 {
8700 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8701 cch = s_offMarker;
8702 }
8703 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8704 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8705
8706 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8707 }
8708 else
8709 {
8710 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8711 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8712 cbInstr = 1;
8713 }
8714 GCPhysPc += cbInstr;
8715 offOpcodes += cbInstr;
8716 offRange += cbInstr;
8717 continue;
8718 }
8719
8720 case kIemTbDbgEntryType_ThreadedCall:
8721 pHlp->pfnPrintf(pHlp,
8722 " Call #%u to %s (%u args) - %s\n",
8723 idxThreadedCall,
8724 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8725 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8726 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8727 idxThreadedCall++;
8728 continue;
8729
8730 case kIemTbDbgEntryType_GuestRegShadowing:
8731 {
8732 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8733 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8734 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8735 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8736 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8737 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8738 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8739 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8740 else
8741 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8742 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8743 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8744 continue;
8745 }
8746
8747#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8748 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8749 {
8750 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8751 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8752 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8753 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8754 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8755 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8756 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8757 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8758 else
8759 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8760 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8761 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8762 continue;
8763 }
8764#endif
8765
8766 case kIemTbDbgEntryType_Label:
8767 {
8768 const char *pszName = "what_the_fudge";
8769 const char *pszComment = "";
8770 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8771 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8772 {
8773 case kIemNativeLabelType_Return:
8774 pszName = "Return";
8775 break;
8776 case kIemNativeLabelType_ReturnBreak:
8777 pszName = "ReturnBreak";
8778 break;
8779 case kIemNativeLabelType_ReturnWithFlags:
8780 pszName = "ReturnWithFlags";
8781 break;
8782 case kIemNativeLabelType_NonZeroRetOrPassUp:
8783 pszName = "NonZeroRetOrPassUp";
8784 break;
8785 case kIemNativeLabelType_RaiseGp0:
8786 pszName = "RaiseGp0";
8787 break;
8788 case kIemNativeLabelType_RaiseNm:
8789 pszName = "RaiseNm";
8790 break;
8791 case kIemNativeLabelType_RaiseUd:
8792 pszName = "RaiseUd";
8793 break;
8794 case kIemNativeLabelType_RaiseMf:
8795 pszName = "RaiseMf";
8796 break;
8797 case kIemNativeLabelType_RaiseXf:
8798 pszName = "RaiseXf";
8799 break;
8800 case kIemNativeLabelType_ObsoleteTb:
8801 pszName = "ObsoleteTb";
8802 break;
8803 case kIemNativeLabelType_NeedCsLimChecking:
8804 pszName = "NeedCsLimChecking";
8805 break;
8806 case kIemNativeLabelType_CheckBranchMiss:
8807 pszName = "CheckBranchMiss";
8808 break;
8809 case kIemNativeLabelType_If:
8810 pszName = "If";
8811 fNumbered = true;
8812 break;
8813 case kIemNativeLabelType_Else:
8814 pszName = "Else";
8815 fNumbered = true;
8816 pszComment = " ; regs state restored pre-if-block";
8817 break;
8818 case kIemNativeLabelType_Endif:
8819 pszName = "Endif";
8820 fNumbered = true;
8821 break;
8822 case kIemNativeLabelType_CheckIrq:
8823 pszName = "CheckIrq_CheckVM";
8824 fNumbered = true;
8825 break;
8826 case kIemNativeLabelType_TlbLookup:
8827 pszName = "TlbLookup";
8828 fNumbered = true;
8829 break;
8830 case kIemNativeLabelType_TlbMiss:
8831 pszName = "TlbMiss";
8832 fNumbered = true;
8833 break;
8834 case kIemNativeLabelType_TlbDone:
8835 pszName = "TlbDone";
8836 fNumbered = true;
8837 break;
8838 case kIemNativeLabelType_Invalid:
8839 case kIemNativeLabelType_End:
8840 break;
8841 }
8842 if (fNumbered)
8843 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8844 else
8845 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8846 continue;
8847 }
8848
8849 case kIemTbDbgEntryType_NativeOffset:
8850 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8851 Assert(offDbgNativeNext > offNative);
8852 break;
8853
8854#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8855 case kIemTbDbgEntryType_DelayedPcUpdate:
8856 pHlp->pfnPrintf(pHlp,
8857 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8858 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8859 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8860 continue;
8861#endif
8862
8863 default:
8864 AssertFailed();
8865 }
8866 iDbgEntry++;
8867 break;
8868 }
8869 }
8870
8871 /*
8872 * Disassemble the next native instruction.
8873 */
8874 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8875# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8876 uint32_t cbInstr = sizeof(paNative[0]);
8877 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8878 if (RT_SUCCESS(rc))
8879 {
8880# if defined(RT_ARCH_AMD64)
8881 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8882 {
8883 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8884 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8885 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8886 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8887 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8888 uInfo & 0x8000 ? "recompiled" : "todo");
8889 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8890 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8891 else
8892 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8893 }
8894 else
8895# endif
8896 {
8897 const char *pszAnnotation = NULL;
8898# ifdef RT_ARCH_AMD64
8899 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8900 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8901 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8902 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8903 PCDISOPPARAM pMemOp;
8904 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8905 pMemOp = &Dis.Param1;
8906 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8907 pMemOp = &Dis.Param2;
8908 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8909 pMemOp = &Dis.Param3;
8910 else
8911 pMemOp = NULL;
8912 if ( pMemOp
8913 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8914 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8915 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8916 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8917
8918#elif defined(RT_ARCH_ARM64)
8919 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8920 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8921 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8922# else
8923# error "Port me"
8924# endif
8925 if (pszAnnotation)
8926 {
8927 static unsigned const s_offAnnotation = 55;
8928 size_t const cchAnnotation = strlen(pszAnnotation);
8929 size_t cchDis = strlen(szDisBuf);
8930 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8931 {
8932 if (cchDis < s_offAnnotation)
8933 {
8934 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8935 cchDis = s_offAnnotation;
8936 }
8937 szDisBuf[cchDis++] = ' ';
8938 szDisBuf[cchDis++] = ';';
8939 szDisBuf[cchDis++] = ' ';
8940 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8941 }
8942 }
8943 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8944 }
8945 }
8946 else
8947 {
8948# if defined(RT_ARCH_AMD64)
8949 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8950 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8951# elif defined(RT_ARCH_ARM64)
8952 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8953# else
8954# error "Port me"
8955# endif
8956 cbInstr = sizeof(paNative[0]);
8957 }
8958 offNative += cbInstr / sizeof(paNative[0]);
8959
8960# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8961 cs_insn *pInstr;
8962 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8963 (uintptr_t)pNativeCur, 1, &pInstr);
8964 if (cInstrs > 0)
8965 {
8966 Assert(cInstrs == 1);
8967 const char *pszAnnotation = NULL;
8968# if defined(RT_ARCH_ARM64)
8969 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8970 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8971 {
8972 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8973 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8974 char *psz = strchr(pInstr->op_str, '[');
8975 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8976 {
8977 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8978 int32_t off = -1;
8979 psz += 4;
8980 if (*psz == ']')
8981 off = 0;
8982 else if (*psz == ',')
8983 {
8984 psz = RTStrStripL(psz + 1);
8985 if (*psz == '#')
8986 off = RTStrToInt32(&psz[1]);
8987 /** @todo deal with index registers and LSL as well... */
8988 }
8989 if (off >= 0)
8990 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8991 }
8992 }
8993# endif
8994
8995 size_t const cchOp = strlen(pInstr->op_str);
8996# if defined(RT_ARCH_AMD64)
8997 if (pszAnnotation)
8998 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8999 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9000 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9001 else
9002 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9003 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9004
9005# else
9006 if (pszAnnotation)
9007 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9008 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9009 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9010 else
9011 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9012 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9013# endif
9014 offNative += pInstr->size / sizeof(*pNativeCur);
9015 cs_free(pInstr, cInstrs);
9016 }
9017 else
9018 {
9019# if defined(RT_ARCH_AMD64)
9020 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9021 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9022# else
9023 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9024# endif
9025 offNative++;
9026 }
9027# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9028 }
9029 }
9030 else
9031#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9032 {
9033 /*
9034 * No debug info, just disassemble the x86 code and then the native code.
9035 *
9036 * First the guest code:
9037 */
9038 for (unsigned i = 0; i < pTb->cRanges; i++)
9039 {
9040 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9041 + (pTb->aRanges[i].idxPhysPage == 0
9042 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9043 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9044 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9045 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9046 unsigned off = pTb->aRanges[i].offOpcodes;
9047 /** @todo this ain't working when crossing pages! */
9048 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9049 while (off < cbOpcodes)
9050 {
9051 uint32_t cbInstr = 1;
9052 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9053 &pTb->pabOpcodes[off], cbOpcodes - off,
9054 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9055 if (RT_SUCCESS(rc))
9056 {
9057 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9058 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9059 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9060 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9061 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9062 GCPhysPc += cbInstr;
9063 off += cbInstr;
9064 }
9065 else
9066 {
9067 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9068 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9069 break;
9070 }
9071 }
9072 }
9073
9074 /*
9075 * Then the native code:
9076 */
9077 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9078 while (offNative < cNative)
9079 {
9080 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9081# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9082 uint32_t cbInstr = sizeof(paNative[0]);
9083 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9084 if (RT_SUCCESS(rc))
9085 {
9086# if defined(RT_ARCH_AMD64)
9087 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9088 {
9089 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9090 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9091 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9092 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9093 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9094 uInfo & 0x8000 ? "recompiled" : "todo");
9095 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9096 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9097 else
9098 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9099 }
9100 else
9101# endif
9102 {
9103# ifdef RT_ARCH_AMD64
9104 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9105 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9106 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9107 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9108# elif defined(RT_ARCH_ARM64)
9109 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9110 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9111 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9112# else
9113# error "Port me"
9114# endif
9115 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9116 }
9117 }
9118 else
9119 {
9120# if defined(RT_ARCH_AMD64)
9121 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9122 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9123# else
9124 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9125# endif
9126 cbInstr = sizeof(paNative[0]);
9127 }
9128 offNative += cbInstr / sizeof(paNative[0]);
9129
9130# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9131 cs_insn *pInstr;
9132 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9133 (uintptr_t)pNativeCur, 1, &pInstr);
9134 if (cInstrs > 0)
9135 {
9136 Assert(cInstrs == 1);
9137# if defined(RT_ARCH_AMD64)
9138 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9139 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9140# else
9141 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9142 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9143# endif
9144 offNative += pInstr->size / sizeof(*pNativeCur);
9145 cs_free(pInstr, cInstrs);
9146 }
9147 else
9148 {
9149# if defined(RT_ARCH_AMD64)
9150 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9151 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9152# else
9153 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9154# endif
9155 offNative++;
9156 }
9157# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9158 }
9159 }
9160
9161#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9162 /* Cleanup. */
9163 cs_close(&hDisasm);
9164#endif
9165}
9166
9167
9168/**
9169 * Recompiles the given threaded TB into a native one.
9170 *
9171 * In case of failure the translation block will be returned as-is.
9172 *
9173 * @returns pTb.
9174 * @param pVCpu The cross context virtual CPU structure of the calling
9175 * thread.
9176 * @param pTb The threaded translation to recompile to native.
9177 */
9178DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9179{
9180 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9181
9182 /*
9183 * The first time thru, we allocate the recompiler state, the other times
9184 * we just need to reset it before using it again.
9185 */
9186 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9187 if (RT_LIKELY(pReNative))
9188 iemNativeReInit(pReNative, pTb);
9189 else
9190 {
9191 pReNative = iemNativeInit(pVCpu, pTb);
9192 AssertReturn(pReNative, pTb);
9193 }
9194
9195#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9196 /*
9197 * First do liveness analysis. This is done backwards.
9198 */
9199 {
9200 uint32_t idxCall = pTb->Thrd.cCalls;
9201 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9202 { /* likely */ }
9203 else
9204 {
9205 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9206 while (idxCall > cAlloc)
9207 cAlloc *= 2;
9208 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9209 AssertReturn(pvNew, pTb);
9210 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9211 pReNative->cLivenessEntriesAlloc = cAlloc;
9212 }
9213 AssertReturn(idxCall > 0, pTb);
9214 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9215
9216 /* The initial (final) entry. */
9217 idxCall--;
9218 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9219
9220 /* Loop backwards thru the calls and fill in the other entries. */
9221 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9222 while (idxCall > 0)
9223 {
9224 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9225 if (pfnLiveness)
9226 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9227 else
9228 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9229 pCallEntry--;
9230 idxCall--;
9231 }
9232
9233# ifdef VBOX_WITH_STATISTICS
9234 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9235 to 'clobbered' rather that 'input'. */
9236 /** @todo */
9237# endif
9238 }
9239#endif
9240
9241 /*
9242 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9243 * for aborting if an error happens.
9244 */
9245 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9246#ifdef LOG_ENABLED
9247 uint32_t const cCallsOrg = cCallsLeft;
9248#endif
9249 uint32_t off = 0;
9250 int rc = VINF_SUCCESS;
9251 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9252 {
9253 /*
9254 * Emit prolog code (fixed).
9255 */
9256 off = iemNativeEmitProlog(pReNative, off);
9257
9258 /*
9259 * Convert the calls to native code.
9260 */
9261#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9262 int32_t iGstInstr = -1;
9263#endif
9264#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9265 uint32_t cThreadedCalls = 0;
9266 uint32_t cRecompiledCalls = 0;
9267#endif
9268#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9269 uint32_t idxCurCall = 0;
9270#endif
9271 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9272 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9273 while (cCallsLeft-- > 0)
9274 {
9275 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9276#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9277 pReNative->idxCurCall = idxCurCall;
9278#endif
9279
9280 /*
9281 * Debug info, assembly markup and statistics.
9282 */
9283#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9284 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9285 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9286#endif
9287#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9288 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9289 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9290 {
9291 if (iGstInstr < (int32_t)pTb->cInstructions)
9292 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9293 else
9294 Assert(iGstInstr == pTb->cInstructions);
9295 iGstInstr = pCallEntry->idxInstr;
9296 }
9297 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9298#endif
9299#if defined(VBOX_STRICT)
9300 off = iemNativeEmitMarker(pReNative, off,
9301 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9302#endif
9303#if defined(VBOX_STRICT)
9304 iemNativeRegAssertSanity(pReNative);
9305#endif
9306#ifdef VBOX_WITH_STATISTICS
9307 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9308#endif
9309
9310 /*
9311 * Actual work.
9312 */
9313 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9314 pfnRecom ? "(recompiled)" : "(todo)"));
9315 if (pfnRecom) /** @todo stats on this. */
9316 {
9317 off = pfnRecom(pReNative, off, pCallEntry);
9318 STAM_REL_STATS({cRecompiledCalls++;});
9319 }
9320 else
9321 {
9322 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9323 STAM_REL_STATS({cThreadedCalls++;});
9324 }
9325 Assert(off <= pReNative->cInstrBufAlloc);
9326 Assert(pReNative->cCondDepth == 0);
9327
9328#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9329 if (LogIs2Enabled())
9330 {
9331 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9332# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9333 static const char s_achState[] = "CUXI";
9334# else
9335 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9336# endif
9337
9338 char szGpr[17];
9339 for (unsigned i = 0; i < 16; i++)
9340 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9341 szGpr[16] = '\0';
9342
9343 char szSegBase[X86_SREG_COUNT + 1];
9344 char szSegLimit[X86_SREG_COUNT + 1];
9345 char szSegAttrib[X86_SREG_COUNT + 1];
9346 char szSegSel[X86_SREG_COUNT + 1];
9347 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9348 {
9349 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9350 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9351 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9352 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9353 }
9354 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9355 = szSegSel[X86_SREG_COUNT] = '\0';
9356
9357 char szEFlags[8];
9358 for (unsigned i = 0; i < 7; i++)
9359 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9360 szEFlags[7] = '\0';
9361
9362 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9363 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9364 }
9365#endif
9366
9367 /*
9368 * Advance.
9369 */
9370 pCallEntry++;
9371#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9372 idxCurCall++;
9373#endif
9374 }
9375
9376 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9377 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9378 if (!cThreadedCalls)
9379 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9380
9381 /*
9382 * Emit the epilog code.
9383 */
9384 uint32_t idxReturnLabel;
9385 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9386
9387 /*
9388 * Generate special jump labels.
9389 */
9390 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9391 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9392 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9393 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9394 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
9395 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
9396 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
9397 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
9398 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
9399 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
9400 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
9401 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
9402 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
9403 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
9404 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
9405 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
9406 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
9407 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
9408 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
9409 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
9410 }
9411 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9412 {
9413 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9414 return pTb;
9415 }
9416 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9417 Assert(off <= pReNative->cInstrBufAlloc);
9418
9419 /*
9420 * Make sure all labels has been defined.
9421 */
9422 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9423#ifdef VBOX_STRICT
9424 uint32_t const cLabels = pReNative->cLabels;
9425 for (uint32_t i = 0; i < cLabels; i++)
9426 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9427#endif
9428
9429 /*
9430 * Allocate executable memory, copy over the code we've generated.
9431 */
9432 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9433 if (pTbAllocator->pDelayedFreeHead)
9434 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9435
9436 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9437 AssertReturn(paFinalInstrBuf, pTb);
9438 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9439
9440 /*
9441 * Apply fixups.
9442 */
9443 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9444 uint32_t const cFixups = pReNative->cFixups;
9445 for (uint32_t i = 0; i < cFixups; i++)
9446 {
9447 Assert(paFixups[i].off < off);
9448 Assert(paFixups[i].idxLabel < cLabels);
9449 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9450 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9451 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9452 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9453 switch (paFixups[i].enmType)
9454 {
9455#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9456 case kIemNativeFixupType_Rel32:
9457 Assert(paFixups[i].off + 4 <= off);
9458 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9459 continue;
9460
9461#elif defined(RT_ARCH_ARM64)
9462 case kIemNativeFixupType_RelImm26At0:
9463 {
9464 Assert(paFixups[i].off < off);
9465 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9466 Assert(offDisp >= -262144 && offDisp < 262144);
9467 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9468 continue;
9469 }
9470
9471 case kIemNativeFixupType_RelImm19At5:
9472 {
9473 Assert(paFixups[i].off < off);
9474 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9475 Assert(offDisp >= -262144 && offDisp < 262144);
9476 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9477 continue;
9478 }
9479
9480 case kIemNativeFixupType_RelImm14At5:
9481 {
9482 Assert(paFixups[i].off < off);
9483 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9484 Assert(offDisp >= -8192 && offDisp < 8192);
9485 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9486 continue;
9487 }
9488
9489#endif
9490 case kIemNativeFixupType_Invalid:
9491 case kIemNativeFixupType_End:
9492 break;
9493 }
9494 AssertFailed();
9495 }
9496
9497 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9498 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9499
9500 /*
9501 * Convert the translation block.
9502 */
9503 RTMemFree(pTb->Thrd.paCalls);
9504 pTb->Native.paInstructions = paFinalInstrBuf;
9505 pTb->Native.cInstructions = off;
9506 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9507#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9508 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9509 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9510#endif
9511
9512 Assert(pTbAllocator->cThreadedTbs > 0);
9513 pTbAllocator->cThreadedTbs -= 1;
9514 pTbAllocator->cNativeTbs += 1;
9515 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9516
9517#ifdef LOG_ENABLED
9518 /*
9519 * Disassemble to the log if enabled.
9520 */
9521 if (LogIs3Enabled())
9522 {
9523 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9524 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9525# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9526 RTLogFlush(NULL);
9527# endif
9528 }
9529#endif
9530 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9531
9532 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9533 return pTb;
9534}
9535
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette