VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103807

Last change on this file since 103807 was 103807, checked in by vboxsync, 9 months ago

VMM/IEM: Split up the native recompiler functions (IEMNativeFunctions.cpp.h) into 4 files to speed up compilation and reduce compiler memory consumption. This involved splitting out half the content of IEMAllThrdRecompiler.cpp into IEMAllN8veRecompFuncs.h and IEMN8veRecompiler.h. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 403.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103807 2024-03-12 19:43:31Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#NM.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseDeviceNotAvailableJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise a \#UD.
1607 */
1608IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1609{
1610 iemRaiseUndefinedOpcodeJmp(pVCpu);
1611#ifndef _MSC_VER
1612 return VINF_IEM_RAISED_XCPT; /* not reached */
1613#endif
1614}
1615
1616
1617/**
1618 * Used by TB code when it wants to raise a \#MF.
1619 */
1620IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1621{
1622 iemRaiseMathFaultJmp(pVCpu);
1623#ifndef _MSC_VER
1624 return VINF_IEM_RAISED_XCPT; /* not reached */
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code when it wants to raise a \#XF.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1633{
1634 iemRaiseSimdFpExceptionJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when detecting opcode changes.
1643 * @see iemThreadeFuncWorkerObsoleteTb
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1646{
1647 /* We set fSafeToFree to false where as we're being called in the context
1648 of a TB callback function, which for native TBs means we cannot release
1649 the executable memory till we've returned our way back to iemTbExec as
1650 that return path codes via the native code generated for the TB. */
1651 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1652 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1653 return VINF_IEM_REEXEC_BREAK;
1654}
1655
1656
1657/**
1658 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1661{
1662 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1663 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1664 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1665 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1666 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1667 return VINF_IEM_REEXEC_BREAK;
1668}
1669
1670
1671/**
1672 * Used by TB code when we missed a PC check after a branch.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1675{
1676 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1677 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1678 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1679 pVCpu->iem.s.pbInstrBuf));
1680 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1681 return VINF_IEM_REEXEC_BREAK;
1682}
1683
1684
1685
1686/*********************************************************************************************************************************
1687* Helpers: Segmented memory fetches and stores. *
1688*********************************************************************************************************************************/
1689
1690/**
1691 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1694{
1695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1696 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1697#else
1698 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1699#endif
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1705 * to 16 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1710 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1711#else
1712 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1713#endif
1714}
1715
1716
1717/**
1718 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1719 * to 32 bits.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1724 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1725#else
1726 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1727#endif
1728}
1729
1730/**
1731 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1732 * to 64 bits.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1737 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1738#else
1739 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1750 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1751#else
1752 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1759 * to 32 bits.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1764 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1765#else
1766 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1773 * to 64 bits.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1778 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1779#else
1780 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1791 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1792#else
1793 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1800 * to 64 bits.
1801 */
1802IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1803{
1804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1805 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1806#else
1807 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1808#endif
1809}
1810
1811
1812/**
1813 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1814 */
1815IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1816{
1817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1818 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1819#else
1820 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1821#endif
1822}
1823
1824
1825/**
1826 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1831 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1832#else
1833 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1844 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1845#else
1846 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1857 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1858#else
1859 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1870 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1871#else
1872 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1873#endif
1874}
1875
1876
1877
1878/**
1879 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1884 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1885#else
1886 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1897 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1898#else
1899 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to store an 32-bit selector value onto a generic stack.
1906 *
1907 * Intel CPUs doesn't do write a whole dword, thus the special function.
1908 */
1909IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1910{
1911#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1912 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1913#else
1914 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1915#endif
1916}
1917
1918
1919/**
1920 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1921 */
1922IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1923{
1924#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1925 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1926#else
1927 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1928#endif
1929}
1930
1931
1932/**
1933 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1934 */
1935IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1936{
1937#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1938 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1939#else
1940 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1941#endif
1942}
1943
1944
1945/**
1946 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1947 */
1948IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1949{
1950#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1951 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1952#else
1953 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1954#endif
1955}
1956
1957
1958/**
1959 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1960 */
1961IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1962{
1963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1964 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1965#else
1966 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1967#endif
1968}
1969
1970
1971
1972/*********************************************************************************************************************************
1973* Helpers: Flat memory fetches and stores. *
1974*********************************************************************************************************************************/
1975
1976/**
1977 * Used by TB code to load unsigned 8-bit data w/ flat address.
1978 * @note Zero extending the value to 64-bit to simplify assembly.
1979 */
1980IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1981{
1982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1983 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1984#else
1985 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1986#endif
1987}
1988
1989
1990/**
1991 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1992 * to 16 bits.
1993 * @note Zero extending the value to 64-bit to simplify assembly.
1994 */
1995IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1996{
1997#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1998 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1999#else
2000 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2001#endif
2002}
2003
2004
2005/**
2006 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2007 * to 32 bits.
2008 * @note Zero extending the value to 64-bit to simplify assembly.
2009 */
2010IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2011{
2012#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2013 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2014#else
2015 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2016#endif
2017}
2018
2019
2020/**
2021 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2022 * to 64 bits.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2027 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2028#else
2029 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to load unsigned 16-bit data w/ flat address.
2036 * @note Zero extending the value to 64-bit to simplify assembly.
2037 */
2038IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2039{
2040#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2041 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2042#else
2043 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2044#endif
2045}
2046
2047
2048/**
2049 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2050 * to 32 bits.
2051 * @note Zero extending the value to 64-bit to simplify assembly.
2052 */
2053IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2054{
2055#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2056 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2057#else
2058 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2059#endif
2060}
2061
2062
2063/**
2064 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2065 * to 64 bits.
2066 * @note Zero extending the value to 64-bit to simplify assembly.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to load unsigned 32-bit data w/ flat address.
2080 * @note Zero extending the value to 64-bit to simplify assembly.
2081 */
2082IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2083{
2084#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2085 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2086#else
2087 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2088#endif
2089}
2090
2091
2092/**
2093 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2094 * to 64 bits.
2095 * @note Zero extending the value to 64-bit to simplify assembly.
2096 */
2097IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2098{
2099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2100 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2101#else
2102 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2103#endif
2104}
2105
2106
2107/**
2108 * Used by TB code to load unsigned 64-bit data w/ flat address.
2109 */
2110IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2111{
2112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2113 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2114#else
2115 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2116#endif
2117}
2118
2119
2120/**
2121 * Used by TB code to store unsigned 8-bit data w/ flat address.
2122 */
2123IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2124{
2125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2126 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2127#else
2128 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2129#endif
2130}
2131
2132
2133/**
2134 * Used by TB code to store unsigned 16-bit data w/ flat address.
2135 */
2136IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2137{
2138#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2139 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2140#else
2141 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2142#endif
2143}
2144
2145
2146/**
2147 * Used by TB code to store unsigned 32-bit data w/ flat address.
2148 */
2149IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2150{
2151#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2152 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2153#else
2154 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2155#endif
2156}
2157
2158
2159/**
2160 * Used by TB code to store unsigned 64-bit data w/ flat address.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2165 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2166#else
2167 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2168#endif
2169}
2170
2171
2172
2173/**
2174 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2175 */
2176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2177{
2178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2179 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2180#else
2181 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2182#endif
2183}
2184
2185
2186/**
2187 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2188 */
2189IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2190{
2191#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2192 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2193#else
2194 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2195#endif
2196}
2197
2198
2199/**
2200 * Used by TB code to store a segment selector value onto a flat stack.
2201 *
2202 * Intel CPUs doesn't do write a whole dword, thus the special function.
2203 */
2204IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2205{
2206#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2207 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2208#else
2209 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2210#endif
2211}
2212
2213
2214/**
2215 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2216 */
2217IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2218{
2219#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2220 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2221#else
2222 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2223#endif
2224}
2225
2226
2227/**
2228 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2229 */
2230IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2231{
2232#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2233 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2234#else
2235 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2236#endif
2237}
2238
2239
2240/**
2241 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2242 */
2243IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2244{
2245#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2246 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2247#else
2248 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2249#endif
2250}
2251
2252
2253/**
2254 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2255 */
2256IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2257{
2258#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2259 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2260#else
2261 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2262#endif
2263}
2264
2265
2266
2267/*********************************************************************************************************************************
2268* Helpers: Segmented memory mapping. *
2269*********************************************************************************************************************************/
2270
2271/**
2272 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2273 * segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2288 */
2289IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2290 RTGCPTR GCPtrMem, uint8_t iSegReg))
2291{
2292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2293 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2294#else
2295 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2296#endif
2297}
2298
2299
2300/**
2301 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2302 */
2303IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2304 RTGCPTR GCPtrMem, uint8_t iSegReg))
2305{
2306#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2307 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2308#else
2309 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2310#endif
2311}
2312
2313
2314/**
2315 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2316 */
2317IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2318 RTGCPTR GCPtrMem, uint8_t iSegReg))
2319{
2320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2321 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2322#else
2323 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2324#endif
2325}
2326
2327
2328/**
2329 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2330 * segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2345 */
2346IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2347 RTGCPTR GCPtrMem, uint8_t iSegReg))
2348{
2349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2350 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2351#else
2352 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2353#endif
2354}
2355
2356
2357/**
2358 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2361 RTGCPTR GCPtrMem, uint8_t iSegReg))
2362{
2363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2364 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2365#else
2366 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2367#endif
2368}
2369
2370
2371/**
2372 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2373 */
2374IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2375 RTGCPTR GCPtrMem, uint8_t iSegReg))
2376{
2377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2378 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2379#else
2380 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2381#endif
2382}
2383
2384
2385/**
2386 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2387 * segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2402 */
2403IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2404 RTGCPTR GCPtrMem, uint8_t iSegReg))
2405{
2406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2407 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2408#else
2409 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2410#endif
2411}
2412
2413
2414/**
2415 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2416 */
2417IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2418 RTGCPTR GCPtrMem, uint8_t iSegReg))
2419{
2420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2421 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2422#else
2423 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2424#endif
2425}
2426
2427
2428/**
2429 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2430 */
2431IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2432 RTGCPTR GCPtrMem, uint8_t iSegReg))
2433{
2434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2435 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2436#else
2437 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2438#endif
2439}
2440
2441
2442/**
2443 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2444 * segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2487 */
2488IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2489 RTGCPTR GCPtrMem, uint8_t iSegReg))
2490{
2491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2492 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2493#else
2494 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2495#endif
2496}
2497
2498
2499/**
2500 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2501 */
2502IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2503 RTGCPTR GCPtrMem, uint8_t iSegReg))
2504{
2505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2506 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2507#else
2508 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2509#endif
2510}
2511
2512
2513/**
2514 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2515 */
2516IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2517 RTGCPTR GCPtrMem, uint8_t iSegReg))
2518{
2519#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2520 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2521#else
2522 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2523#endif
2524}
2525
2526
2527/**
2528 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2529 * segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/**
2543 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2544 */
2545IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2546 RTGCPTR GCPtrMem, uint8_t iSegReg))
2547{
2548#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2549 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2550#else
2551 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2552#endif
2553}
2554
2555
2556/**
2557 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2558 */
2559IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2560 RTGCPTR GCPtrMem, uint8_t iSegReg))
2561{
2562#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2563 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2564#else
2565 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2566#endif
2567}
2568
2569
2570/**
2571 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2572 */
2573IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2574 RTGCPTR GCPtrMem, uint8_t iSegReg))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2578#else
2579 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2580#endif
2581}
2582
2583
2584/*********************************************************************************************************************************
2585* Helpers: Flat memory mapping. *
2586*********************************************************************************************************************************/
2587
2588/**
2589 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2590 * address.
2591 */
2592IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2593{
2594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2595 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2596#else
2597 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2598#endif
2599}
2600
2601
2602/**
2603 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2604 */
2605IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2606{
2607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2608 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2609#else
2610 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2611#endif
2612}
2613
2614
2615/**
2616 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2617 */
2618IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2619{
2620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2621 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2622#else
2623 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2624#endif
2625}
2626
2627
2628/**
2629 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2630 */
2631IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2632{
2633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2634 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2635#else
2636 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2637#endif
2638}
2639
2640
2641/**
2642 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2643 * address.
2644 */
2645IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2646{
2647#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2648 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2649#else
2650 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2651#endif
2652}
2653
2654
2655/**
2656 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2657 */
2658IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2659{
2660#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2661 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2662#else
2663 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2664#endif
2665}
2666
2667
2668/**
2669 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2670 */
2671IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2672{
2673#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2674 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2675#else
2676 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2677#endif
2678}
2679
2680
2681/**
2682 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2683 */
2684IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2685{
2686#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2687 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2688#else
2689 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2690#endif
2691}
2692
2693
2694/**
2695 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2696 * address.
2697 */
2698IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2699{
2700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2701 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2702#else
2703 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2704#endif
2705}
2706
2707
2708/**
2709 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2710 */
2711IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2715#else
2716 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2725{
2726#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2727 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2728#else
2729 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2730#endif
2731}
2732
2733
2734/**
2735 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2736 */
2737IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2738{
2739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2740 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2741#else
2742 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2743#endif
2744}
2745
2746
2747/**
2748 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2749 * address.
2750 */
2751IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2752{
2753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2754 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2755#else
2756 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2757#endif
2758}
2759
2760
2761/**
2762 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2763 */
2764IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2765{
2766#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2767 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2768#else
2769 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2770#endif
2771}
2772
2773
2774/**
2775 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2776 */
2777IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2778{
2779#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2780 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2781#else
2782 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2783#endif
2784}
2785
2786
2787/**
2788 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2789 */
2790IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2791{
2792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2793 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2794#else
2795 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2796#endif
2797}
2798
2799
2800/**
2801 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2802 */
2803IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2804{
2805#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2806 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2807#else
2808 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2809#endif
2810}
2811
2812
2813/**
2814 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2815 */
2816IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2817{
2818#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2819 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2820#else
2821 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2822#endif
2823}
2824
2825
2826/**
2827 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2828 * address.
2829 */
2830IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2831{
2832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2833 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2834#else
2835 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2836#endif
2837}
2838
2839
2840/**
2841 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2842 */
2843IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2844{
2845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2846 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2847#else
2848 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2849#endif
2850}
2851
2852
2853/**
2854 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2855 */
2856IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2857{
2858#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2859 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2860#else
2861 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2862#endif
2863}
2864
2865
2866/**
2867 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2868 */
2869IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2870{
2871#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2872 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2873#else
2874 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2875#endif
2876}
2877
2878
2879/*********************************************************************************************************************************
2880* Helpers: Commit, rollback & unmap *
2881*********************************************************************************************************************************/
2882
2883/**
2884 * Used by TB code to commit and unmap a read-write memory mapping.
2885 */
2886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2887{
2888 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2889}
2890
2891
2892/**
2893 * Used by TB code to commit and unmap a read-write memory mapping.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2896{
2897 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2898}
2899
2900
2901/**
2902 * Used by TB code to commit and unmap a write-only memory mapping.
2903 */
2904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2905{
2906 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2907}
2908
2909
2910/**
2911 * Used by TB code to commit and unmap a read-only memory mapping.
2912 */
2913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2914{
2915 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2916}
2917
2918
2919/**
2920 * Reinitializes the native recompiler state.
2921 *
2922 * Called before starting a new recompile job.
2923 */
2924static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2925{
2926 pReNative->cLabels = 0;
2927 pReNative->bmLabelTypes = 0;
2928 pReNative->cFixups = 0;
2929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2930 pReNative->pDbgInfo->cEntries = 0;
2931#endif
2932 pReNative->pTbOrg = pTb;
2933 pReNative->cCondDepth = 0;
2934 pReNative->uCondSeqNo = 0;
2935 pReNative->uCheckIrqSeqNo = 0;
2936 pReNative->uTlbSeqNo = 0;
2937
2938#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2939 pReNative->Core.offPc = 0;
2940 pReNative->Core.cInstrPcUpdateSkipped = 0;
2941#endif
2942 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2943#if IEMNATIVE_HST_GREG_COUNT < 32
2944 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2945#endif
2946 ;
2947 pReNative->Core.bmHstRegsWithGstShadow = 0;
2948 pReNative->Core.bmGstRegShadows = 0;
2949 pReNative->Core.bmVars = 0;
2950 pReNative->Core.bmStack = 0;
2951 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2952 pReNative->Core.u64ArgVars = UINT64_MAX;
2953
2954 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2955 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2956 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2957 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2958 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2959 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2960 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2961 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2962 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2963 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2964 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2965 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2968
2969 /* Full host register reinit: */
2970 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2971 {
2972 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2973 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2974 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2975 }
2976
2977 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2978 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2979#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2980 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2981#endif
2982#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2983 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2984#endif
2985#ifdef IEMNATIVE_REG_FIXED_TMP1
2986 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2987#endif
2988#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2989 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2990#endif
2991 );
2992 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2993 {
2994 fRegs &= ~RT_BIT_32(idxReg);
2995 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2996 }
2997
2998 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2999#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3000 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3001#endif
3002#ifdef IEMNATIVE_REG_FIXED_TMP0
3003 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3004#endif
3005#ifdef IEMNATIVE_REG_FIXED_TMP1
3006 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3007#endif
3008#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3009 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3010#endif
3011
3012#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3013# ifdef RT_ARCH_ARM64
3014 /*
3015 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3016 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3017 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3018 * and the register allocator assumes that it will be always free when the lower is picked.
3019 */
3020 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3021# else
3022 uint32_t const fFixedAdditional = 0;
3023# endif
3024
3025 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3026 | fFixedAdditional
3027# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3028 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3029# endif
3030 ;
3031 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3032 pReNative->Core.bmGstSimdRegShadows = 0;
3033 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3034 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3035
3036 /* Full host register reinit: */
3037 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3038 {
3039 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3040 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3041 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3042 }
3043
3044 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3045 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3046 {
3047 fRegs &= ~RT_BIT_32(idxReg);
3048 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3049 }
3050
3051#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3052 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3053#endif
3054
3055#endif
3056
3057 return pReNative;
3058}
3059
3060
3061/**
3062 * Allocates and initializes the native recompiler state.
3063 *
3064 * This is called the first time an EMT wants to recompile something.
3065 *
3066 * @returns Pointer to the new recompiler state.
3067 * @param pVCpu The cross context virtual CPU structure of the calling
3068 * thread.
3069 * @param pTb The TB that's about to be recompiled.
3070 * @thread EMT(pVCpu)
3071 */
3072static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3073{
3074 VMCPU_ASSERT_EMT(pVCpu);
3075
3076 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3077 AssertReturn(pReNative, NULL);
3078
3079 /*
3080 * Try allocate all the buffers and stuff we need.
3081 */
3082 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3083 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3084 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3086 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3087#endif
3088 if (RT_LIKELY( pReNative->pInstrBuf
3089 && pReNative->paLabels
3090 && pReNative->paFixups)
3091#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3092 && pReNative->pDbgInfo
3093#endif
3094 )
3095 {
3096 /*
3097 * Set the buffer & array sizes on success.
3098 */
3099 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3100 pReNative->cLabelsAlloc = _8K;
3101 pReNative->cFixupsAlloc = _16K;
3102#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3103 pReNative->cDbgInfoAlloc = _16K;
3104#endif
3105
3106 /* Other constant stuff: */
3107 pReNative->pVCpu = pVCpu;
3108
3109 /*
3110 * Done, just need to save it and reinit it.
3111 */
3112 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3113 return iemNativeReInit(pReNative, pTb);
3114 }
3115
3116 /*
3117 * Failed. Cleanup and return.
3118 */
3119 AssertFailed();
3120 RTMemFree(pReNative->pInstrBuf);
3121 RTMemFree(pReNative->paLabels);
3122 RTMemFree(pReNative->paFixups);
3123#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3124 RTMemFree(pReNative->pDbgInfo);
3125#endif
3126 RTMemFree(pReNative);
3127 return NULL;
3128}
3129
3130
3131/**
3132 * Creates a label
3133 *
3134 * If the label does not yet have a defined position,
3135 * call iemNativeLabelDefine() later to set it.
3136 *
3137 * @returns Label ID. Throws VBox status code on failure, so no need to check
3138 * the return value.
3139 * @param pReNative The native recompile state.
3140 * @param enmType The label type.
3141 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3142 * label is not yet defined (default).
3143 * @param uData Data associated with the lable. Only applicable to
3144 * certain type of labels. Default is zero.
3145 */
3146DECL_HIDDEN_THROW(uint32_t)
3147iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3148 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3149{
3150 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3151
3152 /*
3153 * Locate existing label definition.
3154 *
3155 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3156 * and uData is zero.
3157 */
3158 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3159 uint32_t const cLabels = pReNative->cLabels;
3160 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3161#ifndef VBOX_STRICT
3162 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3163 && offWhere == UINT32_MAX
3164 && uData == 0
3165#endif
3166 )
3167 {
3168#ifndef VBOX_STRICT
3169 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3170 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3171 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3172 if (idxLabel < pReNative->cLabels)
3173 return idxLabel;
3174#else
3175 for (uint32_t i = 0; i < cLabels; i++)
3176 if ( paLabels[i].enmType == enmType
3177 && paLabels[i].uData == uData)
3178 {
3179 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3180 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3182 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3183 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3184 return i;
3185 }
3186 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3187 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3188#endif
3189 }
3190
3191 /*
3192 * Make sure we've got room for another label.
3193 */
3194 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3195 { /* likely */ }
3196 else
3197 {
3198 uint32_t cNew = pReNative->cLabelsAlloc;
3199 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3200 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3201 cNew *= 2;
3202 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3203 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3204 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3205 pReNative->paLabels = paLabels;
3206 pReNative->cLabelsAlloc = cNew;
3207 }
3208
3209 /*
3210 * Define a new label.
3211 */
3212 paLabels[cLabels].off = offWhere;
3213 paLabels[cLabels].enmType = enmType;
3214 paLabels[cLabels].uData = uData;
3215 pReNative->cLabels = cLabels + 1;
3216
3217 Assert((unsigned)enmType < 64);
3218 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3219
3220 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3221 {
3222 Assert(uData == 0);
3223 pReNative->aidxUniqueLabels[enmType] = cLabels;
3224 }
3225
3226 if (offWhere != UINT32_MAX)
3227 {
3228#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3229 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3230 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3231#endif
3232 }
3233 return cLabels;
3234}
3235
3236
3237/**
3238 * Defines the location of an existing label.
3239 *
3240 * @param pReNative The native recompile state.
3241 * @param idxLabel The label to define.
3242 * @param offWhere The position.
3243 */
3244DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3245{
3246 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3247 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3248 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3249 pLabel->off = offWhere;
3250#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3251 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3252 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3253#endif
3254}
3255
3256
3257/**
3258 * Looks up a lable.
3259 *
3260 * @returns Label ID if found, UINT32_MAX if not.
3261 */
3262static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3263 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3264{
3265 Assert((unsigned)enmType < 64);
3266 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3267 {
3268 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3269 return pReNative->aidxUniqueLabels[enmType];
3270
3271 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3272 uint32_t const cLabels = pReNative->cLabels;
3273 for (uint32_t i = 0; i < cLabels; i++)
3274 if ( paLabels[i].enmType == enmType
3275 && paLabels[i].uData == uData
3276 && ( paLabels[i].off == offWhere
3277 || offWhere == UINT32_MAX
3278 || paLabels[i].off == UINT32_MAX))
3279 return i;
3280 }
3281 return UINT32_MAX;
3282}
3283
3284
3285/**
3286 * Adds a fixup.
3287 *
3288 * @throws VBox status code (int) on failure.
3289 * @param pReNative The native recompile state.
3290 * @param offWhere The instruction offset of the fixup location.
3291 * @param idxLabel The target label ID for the fixup.
3292 * @param enmType The fixup type.
3293 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3294 */
3295DECL_HIDDEN_THROW(void)
3296iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3297 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3298{
3299 Assert(idxLabel <= UINT16_MAX);
3300 Assert((unsigned)enmType <= UINT8_MAX);
3301
3302 /*
3303 * Make sure we've room.
3304 */
3305 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3306 uint32_t const cFixups = pReNative->cFixups;
3307 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3308 { /* likely */ }
3309 else
3310 {
3311 uint32_t cNew = pReNative->cFixupsAlloc;
3312 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3313 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3314 cNew *= 2;
3315 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3316 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3317 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3318 pReNative->paFixups = paFixups;
3319 pReNative->cFixupsAlloc = cNew;
3320 }
3321
3322 /*
3323 * Add the fixup.
3324 */
3325 paFixups[cFixups].off = offWhere;
3326 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3327 paFixups[cFixups].enmType = enmType;
3328 paFixups[cFixups].offAddend = offAddend;
3329 pReNative->cFixups = cFixups + 1;
3330}
3331
3332
3333/**
3334 * Slow code path for iemNativeInstrBufEnsure.
3335 */
3336DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3337{
3338 /* Double the buffer size till we meet the request. */
3339 uint32_t cNew = pReNative->cInstrBufAlloc;
3340 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3341 do
3342 cNew *= 2;
3343 while (cNew < off + cInstrReq);
3344
3345 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3346#ifdef RT_ARCH_ARM64
3347 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3348#else
3349 uint32_t const cbMaxInstrBuf = _2M;
3350#endif
3351 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3352
3353 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3354 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3355
3356#ifdef VBOX_STRICT
3357 pReNative->offInstrBufChecked = off + cInstrReq;
3358#endif
3359 pReNative->cInstrBufAlloc = cNew;
3360 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3361}
3362
3363#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3364
3365/**
3366 * Grows the static debug info array used during recompilation.
3367 *
3368 * @returns Pointer to the new debug info block; throws VBox status code on
3369 * failure, so no need to check the return value.
3370 */
3371DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3372{
3373 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3374 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3375 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3376 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3377 pReNative->pDbgInfo = pDbgInfo;
3378 pReNative->cDbgInfoAlloc = cNew;
3379 return pDbgInfo;
3380}
3381
3382
3383/**
3384 * Adds a new debug info uninitialized entry, returning the pointer to it.
3385 */
3386DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3387{
3388 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3389 { /* likely */ }
3390 else
3391 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3392 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3393}
3394
3395
3396/**
3397 * Debug Info: Adds a native offset record, if necessary.
3398 */
3399DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3400{
3401 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3402
3403 /*
3404 * Search backwards to see if we've got a similar record already.
3405 */
3406 uint32_t idx = pDbgInfo->cEntries;
3407 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3408 while (idx-- > idxStop)
3409 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3410 {
3411 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3412 return;
3413 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3414 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3415 break;
3416 }
3417
3418 /*
3419 * Add it.
3420 */
3421 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3422 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3423 pEntry->NativeOffset.offNative = off;
3424}
3425
3426
3427/**
3428 * Debug Info: Record info about a label.
3429 */
3430static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3431{
3432 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3433 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3434 pEntry->Label.uUnused = 0;
3435 pEntry->Label.enmLabel = (uint8_t)enmType;
3436 pEntry->Label.uData = uData;
3437}
3438
3439
3440/**
3441 * Debug Info: Record info about a threaded call.
3442 */
3443static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3444{
3445 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3446 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3447 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3448 pEntry->ThreadedCall.uUnused = 0;
3449 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3450}
3451
3452
3453/**
3454 * Debug Info: Record info about a new guest instruction.
3455 */
3456static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3457{
3458 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3459 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3460 pEntry->GuestInstruction.uUnused = 0;
3461 pEntry->GuestInstruction.fExec = fExec;
3462}
3463
3464
3465/**
3466 * Debug Info: Record info about guest register shadowing.
3467 */
3468DECL_HIDDEN_THROW(void)
3469iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3470 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3471{
3472 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3473 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3474 pEntry->GuestRegShadowing.uUnused = 0;
3475 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3476 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3477 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3478}
3479
3480
3481# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3482/**
3483 * Debug Info: Record info about guest register shadowing.
3484 */
3485DECL_HIDDEN_THROW(void)
3486iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3487 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3488{
3489 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3490 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3491 pEntry->GuestSimdRegShadowing.uUnused = 0;
3492 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3493 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3494 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3495}
3496# endif
3497
3498
3499# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3500/**
3501 * Debug Info: Record info about delayed RIP updates.
3502 */
3503DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3504{
3505 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3506 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3507 pEntry->DelayedPcUpdate.offPc = offPc;
3508 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3509}
3510# endif
3511
3512#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3513
3514
3515/*********************************************************************************************************************************
3516* Register Allocator *
3517*********************************************************************************************************************************/
3518
3519/**
3520 * Register parameter indexes (indexed by argument number).
3521 */
3522DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3523{
3524 IEMNATIVE_CALL_ARG0_GREG,
3525 IEMNATIVE_CALL_ARG1_GREG,
3526 IEMNATIVE_CALL_ARG2_GREG,
3527 IEMNATIVE_CALL_ARG3_GREG,
3528#if defined(IEMNATIVE_CALL_ARG4_GREG)
3529 IEMNATIVE_CALL_ARG4_GREG,
3530# if defined(IEMNATIVE_CALL_ARG5_GREG)
3531 IEMNATIVE_CALL_ARG5_GREG,
3532# if defined(IEMNATIVE_CALL_ARG6_GREG)
3533 IEMNATIVE_CALL_ARG6_GREG,
3534# if defined(IEMNATIVE_CALL_ARG7_GREG)
3535 IEMNATIVE_CALL_ARG7_GREG,
3536# endif
3537# endif
3538# endif
3539#endif
3540};
3541AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3542
3543/**
3544 * Call register masks indexed by argument count.
3545 */
3546DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3547{
3548 0,
3549 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3550 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3551 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3552 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3553 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3554#if defined(IEMNATIVE_CALL_ARG4_GREG)
3555 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3556 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3557# if defined(IEMNATIVE_CALL_ARG5_GREG)
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3559 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3560# if defined(IEMNATIVE_CALL_ARG6_GREG)
3561 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3562 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3564# if defined(IEMNATIVE_CALL_ARG7_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3567 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3568# endif
3569# endif
3570# endif
3571#endif
3572};
3573
3574#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3575/**
3576 * BP offset of the stack argument slots.
3577 *
3578 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3579 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3580 */
3581DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3582{
3583 IEMNATIVE_FP_OFF_STACK_ARG0,
3584# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3585 IEMNATIVE_FP_OFF_STACK_ARG1,
3586# endif
3587# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3588 IEMNATIVE_FP_OFF_STACK_ARG2,
3589# endif
3590# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3591 IEMNATIVE_FP_OFF_STACK_ARG3,
3592# endif
3593};
3594AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3595#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3596
3597/**
3598 * Info about shadowed guest register values.
3599 * @see IEMNATIVEGSTREG
3600 */
3601DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3602{
3603#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3604 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3605 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3606 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3607 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3608 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3609 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3610 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3611 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3612 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3613 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3614 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3615 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3616 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3617 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3618 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3620 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3621 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3622 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3623 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3624 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3625 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3626 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3627 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3628 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3629 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3630 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3631 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3632 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3633 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3634 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3635 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3636 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3637 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3638 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3639 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3640 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3641 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3642 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3643 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3644 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3645 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3646 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3647 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3648 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3649 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3650 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3651 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3652#undef CPUMCTX_OFF_AND_SIZE
3653};
3654AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3655
3656
3657/** Host CPU general purpose register names. */
3658DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3659{
3660#ifdef RT_ARCH_AMD64
3661 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3662#elif RT_ARCH_ARM64
3663 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3664 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3665#else
3666# error "port me"
3667#endif
3668};
3669
3670
3671#if 0 /* unused */
3672/**
3673 * Tries to locate a suitable register in the given register mask.
3674 *
3675 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3676 * failed.
3677 *
3678 * @returns Host register number on success, returns UINT8_MAX on failure.
3679 */
3680static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3681{
3682 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3683 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3684 if (fRegs)
3685 {
3686 /** @todo pick better here: */
3687 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3688
3689 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3690 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3691 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3692 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3693
3694 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3695 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3696 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3697 return idxReg;
3698 }
3699 return UINT8_MAX;
3700}
3701#endif /* unused */
3702
3703
3704/**
3705 * Locate a register, possibly freeing one up.
3706 *
3707 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3708 * failed.
3709 *
3710 * @returns Host register number on success. Returns UINT8_MAX if no registers
3711 * found, the caller is supposed to deal with this and raise a
3712 * allocation type specific status code (if desired).
3713 *
3714 * @throws VBox status code if we're run into trouble spilling a variable of
3715 * recording debug info. Does NOT throw anything if we're out of
3716 * registers, though.
3717 */
3718static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3719 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3720{
3721 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3722 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3723 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3724
3725 /*
3726 * Try a freed register that's shadowing a guest register.
3727 */
3728 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3729 if (fRegs)
3730 {
3731 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3732
3733#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3734 /*
3735 * When we have livness information, we use it to kick out all shadowed
3736 * guest register that will not be needed any more in this TB. If we're
3737 * lucky, this may prevent us from ending up here again.
3738 *
3739 * Note! We must consider the previous entry here so we don't free
3740 * anything that the current threaded function requires (current
3741 * entry is produced by the next threaded function).
3742 */
3743 uint32_t const idxCurCall = pReNative->idxCurCall;
3744 if (idxCurCall > 0)
3745 {
3746 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3747
3748# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3749 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3750 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3751 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3752#else
3753 /* Construct a mask of the registers not in the read or write state.
3754 Note! We could skips writes, if they aren't from us, as this is just
3755 a hack to prevent trashing registers that have just been written
3756 or will be written when we retire the current instruction. */
3757 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3758 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3759 & IEMLIVENESSBIT_MASK;
3760#endif
3761 /* Merge EFLAGS. */
3762 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3763 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3764 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3765 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3766 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3767
3768 /* If it matches any shadowed registers. */
3769 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3770 {
3771 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3772 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3773 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3774
3775 /* See if we've got any unshadowed registers we can return now. */
3776 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3777 if (fUnshadowedRegs)
3778 {
3779 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3780 return (fPreferVolatile
3781 ? ASMBitFirstSetU32(fUnshadowedRegs)
3782 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3783 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3784 - 1;
3785 }
3786 }
3787 }
3788#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3789
3790 unsigned const idxReg = (fPreferVolatile
3791 ? ASMBitFirstSetU32(fRegs)
3792 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3793 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3794 - 1;
3795
3796 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3797 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3798 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3799 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3800
3801 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3802 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3803 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3804 return idxReg;
3805 }
3806
3807 /*
3808 * Try free up a variable that's in a register.
3809 *
3810 * We do two rounds here, first evacuating variables we don't need to be
3811 * saved on the stack, then in the second round move things to the stack.
3812 */
3813 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3814 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3815 {
3816 uint32_t fVars = pReNative->Core.bmVars;
3817 while (fVars)
3818 {
3819 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3820 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3821 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3822 && (RT_BIT_32(idxReg) & fRegMask)
3823 && ( iLoop == 0
3824 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3825 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3826 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3827 {
3828 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3829 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3830 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3831 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3832 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3833 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3834
3835 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3836 {
3837 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3838 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3839 }
3840
3841 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3842 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3843
3844 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3845 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3846 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3847 return idxReg;
3848 }
3849 fVars &= ~RT_BIT_32(idxVar);
3850 }
3851 }
3852
3853 return UINT8_MAX;
3854}
3855
3856
3857/**
3858 * Reassigns a variable to a different register specified by the caller.
3859 *
3860 * @returns The new code buffer position.
3861 * @param pReNative The native recompile state.
3862 * @param off The current code buffer position.
3863 * @param idxVar The variable index.
3864 * @param idxRegOld The old host register number.
3865 * @param idxRegNew The new host register number.
3866 * @param pszCaller The caller for logging.
3867 */
3868static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3869 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3870{
3871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3872 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3873 RT_NOREF(pszCaller);
3874
3875 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3876
3877 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3878 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3879 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3880 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3881
3882 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3883 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3884 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3885 if (fGstRegShadows)
3886 {
3887 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3888 | RT_BIT_32(idxRegNew);
3889 while (fGstRegShadows)
3890 {
3891 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3892 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3893
3894 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3895 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3896 }
3897 }
3898
3899 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3900 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3901 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3902 return off;
3903}
3904
3905
3906/**
3907 * Moves a variable to a different register or spills it onto the stack.
3908 *
3909 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3910 * kinds can easily be recreated if needed later.
3911 *
3912 * @returns The new code buffer position.
3913 * @param pReNative The native recompile state.
3914 * @param off The current code buffer position.
3915 * @param idxVar The variable index.
3916 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3917 * call-volatile registers.
3918 */
3919DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3920 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3921{
3922 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3923 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3924 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3925 Assert(!pVar->fRegAcquired);
3926
3927 uint8_t const idxRegOld = pVar->idxReg;
3928 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3929 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3930 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3931 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3932 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3933 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3934 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3935 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3936
3937
3938 /** @todo Add statistics on this.*/
3939 /** @todo Implement basic variable liveness analysis (python) so variables
3940 * can be freed immediately once no longer used. This has the potential to
3941 * be trashing registers and stack for dead variables.
3942 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3943
3944 /*
3945 * First try move it to a different register, as that's cheaper.
3946 */
3947 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3948 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3949 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3950 if (fRegs)
3951 {
3952 /* Avoid using shadow registers, if possible. */
3953 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3954 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3955 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3956 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3957 }
3958
3959 /*
3960 * Otherwise we must spill the register onto the stack.
3961 */
3962 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3963 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3964 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3965 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3966
3967 pVar->idxReg = UINT8_MAX;
3968 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3969 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3970 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3971 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3972 return off;
3973}
3974
3975
3976/**
3977 * Allocates a temporary host general purpose register.
3978 *
3979 * This may emit code to save register content onto the stack in order to free
3980 * up a register.
3981 *
3982 * @returns The host register number; throws VBox status code on failure,
3983 * so no need to check the return value.
3984 * @param pReNative The native recompile state.
3985 * @param poff Pointer to the variable with the code buffer position.
3986 * This will be update if we need to move a variable from
3987 * register to stack in order to satisfy the request.
3988 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3989 * registers (@c true, default) or the other way around
3990 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3991 */
3992DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3993{
3994 /*
3995 * Try find a completely unused register, preferably a call-volatile one.
3996 */
3997 uint8_t idxReg;
3998 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3999 & ~pReNative->Core.bmHstRegsWithGstShadow
4000 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4001 if (fRegs)
4002 {
4003 if (fPreferVolatile)
4004 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4005 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4006 else
4007 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4008 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4009 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4010 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4011 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4012 }
4013 else
4014 {
4015 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4016 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4017 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4018 }
4019 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4020}
4021
4022
4023/**
4024 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4025 * registers.
4026 *
4027 * @returns The host register number; throws VBox status code on failure,
4028 * so no need to check the return value.
4029 * @param pReNative The native recompile state.
4030 * @param poff Pointer to the variable with the code buffer position.
4031 * This will be update if we need to move a variable from
4032 * register to stack in order to satisfy the request.
4033 * @param fRegMask Mask of acceptable registers.
4034 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4035 * registers (@c true, default) or the other way around
4036 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4037 */
4038DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4039 bool fPreferVolatile /*= true*/)
4040{
4041 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4042 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4043
4044 /*
4045 * Try find a completely unused register, preferably a call-volatile one.
4046 */
4047 uint8_t idxReg;
4048 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4049 & ~pReNative->Core.bmHstRegsWithGstShadow
4050 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4051 & fRegMask;
4052 if (fRegs)
4053 {
4054 if (fPreferVolatile)
4055 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4056 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4057 else
4058 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4059 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4060 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4061 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4062 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4063 }
4064 else
4065 {
4066 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4067 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4068 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4069 }
4070 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4071}
4072
4073
4074/**
4075 * Allocates a temporary register for loading an immediate value into.
4076 *
4077 * This will emit code to load the immediate, unless there happens to be an
4078 * unused register with the value already loaded.
4079 *
4080 * The caller will not modify the returned register, it must be considered
4081 * read-only. Free using iemNativeRegFreeTmpImm.
4082 *
4083 * @returns The host register number; throws VBox status code on failure, so no
4084 * need to check the return value.
4085 * @param pReNative The native recompile state.
4086 * @param poff Pointer to the variable with the code buffer position.
4087 * @param uImm The immediate value that the register must hold upon
4088 * return.
4089 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4090 * registers (@c true, default) or the other way around
4091 * (@c false).
4092 *
4093 * @note Reusing immediate values has not been implemented yet.
4094 */
4095DECL_HIDDEN_THROW(uint8_t)
4096iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4097{
4098 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4099 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4100 return idxReg;
4101}
4102
4103
4104/**
4105 * Allocates a temporary host general purpose register for keeping a guest
4106 * register value.
4107 *
4108 * Since we may already have a register holding the guest register value,
4109 * code will be emitted to do the loading if that's not the case. Code may also
4110 * be emitted if we have to free up a register to satify the request.
4111 *
4112 * @returns The host register number; throws VBox status code on failure, so no
4113 * need to check the return value.
4114 * @param pReNative The native recompile state.
4115 * @param poff Pointer to the variable with the code buffer
4116 * position. This will be update if we need to move a
4117 * variable from register to stack in order to satisfy
4118 * the request.
4119 * @param enmGstReg The guest register that will is to be updated.
4120 * @param enmIntendedUse How the caller will be using the host register.
4121 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4122 * register is okay (default). The ASSUMPTION here is
4123 * that the caller has already flushed all volatile
4124 * registers, so this is only applied if we allocate a
4125 * new register.
4126 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4127 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4128 */
4129DECL_HIDDEN_THROW(uint8_t)
4130iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4131 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4132 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4133{
4134 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4135#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4136 AssertMsg( fSkipLivenessAssert
4137 || pReNative->idxCurCall == 0
4138 || enmGstReg == kIemNativeGstReg_Pc
4139 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4140 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4141 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4142 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4143 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4144 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4145#endif
4146 RT_NOREF(fSkipLivenessAssert);
4147#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4148 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4149#endif
4150 uint32_t const fRegMask = !fNoVolatileRegs
4151 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4152 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4153
4154 /*
4155 * First check if the guest register value is already in a host register.
4156 */
4157 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4158 {
4159 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4160 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4161 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4162 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4163
4164 /* It's not supposed to be allocated... */
4165 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4166 {
4167 /*
4168 * If the register will trash the guest shadow copy, try find a
4169 * completely unused register we can use instead. If that fails,
4170 * we need to disassociate the host reg from the guest reg.
4171 */
4172 /** @todo would be nice to know if preserving the register is in any way helpful. */
4173 /* If the purpose is calculations, try duplicate the register value as
4174 we'll be clobbering the shadow. */
4175 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4176 && ( ~pReNative->Core.bmHstRegs
4177 & ~pReNative->Core.bmHstRegsWithGstShadow
4178 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4179 {
4180 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4181
4182 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4183
4184 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4185 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4186 g_apszIemNativeHstRegNames[idxRegNew]));
4187 idxReg = idxRegNew;
4188 }
4189 /* If the current register matches the restrictions, go ahead and allocate
4190 it for the caller. */
4191 else if (fRegMask & RT_BIT_32(idxReg))
4192 {
4193 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4194 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4195 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4196 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4197 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4198 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4199 else
4200 {
4201 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4202 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4203 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4204 }
4205 }
4206 /* Otherwise, allocate a register that satisfies the caller and transfer
4207 the shadowing if compatible with the intended use. (This basically
4208 means the call wants a non-volatile register (RSP push/pop scenario).) */
4209 else
4210 {
4211 Assert(fNoVolatileRegs);
4212 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4213 !fNoVolatileRegs
4214 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4215 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4216 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4217 {
4218 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4219 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4220 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4221 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4222 }
4223 else
4224 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4225 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4226 g_apszIemNativeHstRegNames[idxRegNew]));
4227 idxReg = idxRegNew;
4228 }
4229 }
4230 else
4231 {
4232 /*
4233 * Oops. Shadowed guest register already allocated!
4234 *
4235 * Allocate a new register, copy the value and, if updating, the
4236 * guest shadow copy assignment to the new register.
4237 */
4238 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4239 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4240 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4241 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4242
4243 /** @todo share register for readonly access. */
4244 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4245 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4246
4247 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4248 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4249
4250 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4251 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4252 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4253 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4254 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4255 else
4256 {
4257 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4258 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4259 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4260 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4261 }
4262 idxReg = idxRegNew;
4263 }
4264 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4265
4266#ifdef VBOX_STRICT
4267 /* Strict builds: Check that the value is correct. */
4268 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4269#endif
4270
4271 return idxReg;
4272 }
4273
4274 /*
4275 * Allocate a new register, load it with the guest value and designate it as a copy of the
4276 */
4277 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4278
4279 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4280 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4281
4282 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4283 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4284 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4285 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4286
4287 return idxRegNew;
4288}
4289
4290
4291/**
4292 * Allocates a temporary host general purpose register that already holds the
4293 * given guest register value.
4294 *
4295 * The use case for this function is places where the shadowing state cannot be
4296 * modified due to branching and such. This will fail if the we don't have a
4297 * current shadow copy handy or if it's incompatible. The only code that will
4298 * be emitted here is value checking code in strict builds.
4299 *
4300 * The intended use can only be readonly!
4301 *
4302 * @returns The host register number, UINT8_MAX if not present.
4303 * @param pReNative The native recompile state.
4304 * @param poff Pointer to the instruction buffer offset.
4305 * Will be updated in strict builds if a register is
4306 * found.
4307 * @param enmGstReg The guest register that will is to be updated.
4308 * @note In strict builds, this may throw instruction buffer growth failures.
4309 * Non-strict builds will not throw anything.
4310 * @sa iemNativeRegAllocTmpForGuestReg
4311 */
4312DECL_HIDDEN_THROW(uint8_t)
4313iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4314{
4315 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4316#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4317 AssertMsg( pReNative->idxCurCall == 0
4318 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4319 || enmGstReg == kIemNativeGstReg_Pc,
4320 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4321#endif
4322
4323 /*
4324 * First check if the guest register value is already in a host register.
4325 */
4326 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4327 {
4328 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4329 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4330 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4331 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4332
4333 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4334 {
4335 /*
4336 * We only do readonly use here, so easy compared to the other
4337 * variant of this code.
4338 */
4339 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4340 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4341 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4342 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4343 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4344
4345#ifdef VBOX_STRICT
4346 /* Strict builds: Check that the value is correct. */
4347 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4348#else
4349 RT_NOREF(poff);
4350#endif
4351 return idxReg;
4352 }
4353 }
4354
4355 return UINT8_MAX;
4356}
4357
4358
4359/**
4360 * Allocates argument registers for a function call.
4361 *
4362 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4363 * need to check the return value.
4364 * @param pReNative The native recompile state.
4365 * @param off The current code buffer offset.
4366 * @param cArgs The number of arguments the function call takes.
4367 */
4368DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4369{
4370 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4371 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4372 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4373 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4374
4375 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4376 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4377 else if (cArgs == 0)
4378 return true;
4379
4380 /*
4381 * Do we get luck and all register are free and not shadowing anything?
4382 */
4383 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4384 for (uint32_t i = 0; i < cArgs; i++)
4385 {
4386 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4387 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4388 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4389 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4390 }
4391 /*
4392 * Okay, not lucky so we have to free up the registers.
4393 */
4394 else
4395 for (uint32_t i = 0; i < cArgs; i++)
4396 {
4397 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4398 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4399 {
4400 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4401 {
4402 case kIemNativeWhat_Var:
4403 {
4404 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4406 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4407 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4408 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4409
4410 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4411 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4412 else
4413 {
4414 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4415 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4416 }
4417 break;
4418 }
4419
4420 case kIemNativeWhat_Tmp:
4421 case kIemNativeWhat_Arg:
4422 case kIemNativeWhat_rc:
4423 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4424 default:
4425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4426 }
4427
4428 }
4429 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4430 {
4431 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4432 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4433 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4434 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4435 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4436 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4437 }
4438 else
4439 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4440 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4441 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4442 }
4443 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4444 return true;
4445}
4446
4447
4448DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4449
4450
4451#if 0
4452/**
4453 * Frees a register assignment of any type.
4454 *
4455 * @param pReNative The native recompile state.
4456 * @param idxHstReg The register to free.
4457 *
4458 * @note Does not update variables.
4459 */
4460DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4461{
4462 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4463 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4464 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4465 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4466 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4467 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4468 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4469 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4470 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4471 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4472 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4473 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4474 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4475 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4476
4477 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4478 /* no flushing, right:
4479 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4480 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4481 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4482 */
4483}
4484#endif
4485
4486
4487/**
4488 * Frees a temporary register.
4489 *
4490 * Any shadow copies of guest registers assigned to the host register will not
4491 * be flushed by this operation.
4492 */
4493DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4494{
4495 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4496 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4497 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4498 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4499 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4500}
4501
4502
4503/**
4504 * Frees a temporary immediate register.
4505 *
4506 * It is assumed that the call has not modified the register, so it still hold
4507 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4508 */
4509DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4510{
4511 iemNativeRegFreeTmp(pReNative, idxHstReg);
4512}
4513
4514
4515/**
4516 * Frees a register assigned to a variable.
4517 *
4518 * The register will be disassociated from the variable.
4519 */
4520DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4521{
4522 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4523 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4524 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4526 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4527
4528 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4529 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4530 if (!fFlushShadows)
4531 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4532 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4533 else
4534 {
4535 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4536 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4537 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4538 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4539 uint64_t fGstRegShadows = fGstRegShadowsOld;
4540 while (fGstRegShadows)
4541 {
4542 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4543 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4544
4545 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4546 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4547 }
4548 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4549 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4550 }
4551}
4552
4553
4554/**
4555 * Called right before emitting a call instruction to move anything important
4556 * out of call-volatile registers, free and flush the call-volatile registers,
4557 * optionally freeing argument variables.
4558 *
4559 * @returns New code buffer offset, UINT32_MAX on failure.
4560 * @param pReNative The native recompile state.
4561 * @param off The code buffer offset.
4562 * @param cArgs The number of arguments the function call takes.
4563 * It is presumed that the host register part of these have
4564 * been allocated as such already and won't need moving,
4565 * just freeing.
4566 * @param fKeepVars Mask of variables that should keep their register
4567 * assignments. Caller must take care to handle these.
4568 */
4569DECL_HIDDEN_THROW(uint32_t)
4570iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4571{
4572 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4573
4574 /* fKeepVars will reduce this mask. */
4575 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4576
4577 /*
4578 * Move anything important out of volatile registers.
4579 */
4580 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4581 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4582 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4583#ifdef IEMNATIVE_REG_FIXED_TMP0
4584 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4585#endif
4586#ifdef IEMNATIVE_REG_FIXED_TMP1
4587 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4588#endif
4589#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4590 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4591#endif
4592 & ~g_afIemNativeCallRegs[cArgs];
4593
4594 fRegsToMove &= pReNative->Core.bmHstRegs;
4595 if (!fRegsToMove)
4596 { /* likely */ }
4597 else
4598 {
4599 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4600 while (fRegsToMove != 0)
4601 {
4602 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4603 fRegsToMove &= ~RT_BIT_32(idxReg);
4604
4605 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4606 {
4607 case kIemNativeWhat_Var:
4608 {
4609 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4610 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4611 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4612 Assert(pVar->idxReg == idxReg);
4613 if (!(RT_BIT_32(idxVar) & fKeepVars))
4614 {
4615 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4616 idxVar, pVar->enmKind, pVar->idxReg));
4617 if (pVar->enmKind != kIemNativeVarKind_Stack)
4618 pVar->idxReg = UINT8_MAX;
4619 else
4620 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4621 }
4622 else
4623 fRegsToFree &= ~RT_BIT_32(idxReg);
4624 continue;
4625 }
4626
4627 case kIemNativeWhat_Arg:
4628 AssertMsgFailed(("What?!?: %u\n", idxReg));
4629 continue;
4630
4631 case kIemNativeWhat_rc:
4632 case kIemNativeWhat_Tmp:
4633 AssertMsgFailed(("Missing free: %u\n", idxReg));
4634 continue;
4635
4636 case kIemNativeWhat_FixedTmp:
4637 case kIemNativeWhat_pVCpuFixed:
4638 case kIemNativeWhat_pCtxFixed:
4639 case kIemNativeWhat_PcShadow:
4640 case kIemNativeWhat_FixedReserved:
4641 case kIemNativeWhat_Invalid:
4642 case kIemNativeWhat_End:
4643 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4644 }
4645 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4646 }
4647 }
4648
4649 /*
4650 * Do the actual freeing.
4651 */
4652 if (pReNative->Core.bmHstRegs & fRegsToFree)
4653 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4654 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4655 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4656
4657 /* If there are guest register shadows in any call-volatile register, we
4658 have to clear the corrsponding guest register masks for each register. */
4659 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4660 if (fHstRegsWithGstShadow)
4661 {
4662 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4663 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4664 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4665 do
4666 {
4667 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4668 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4669
4670 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4671 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4672 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4673 } while (fHstRegsWithGstShadow != 0);
4674 }
4675
4676 return off;
4677}
4678
4679
4680/**
4681 * Flushes a set of guest register shadow copies.
4682 *
4683 * This is usually done after calling a threaded function or a C-implementation
4684 * of an instruction.
4685 *
4686 * @param pReNative The native recompile state.
4687 * @param fGstRegs Set of guest registers to flush.
4688 */
4689DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4690{
4691 /*
4692 * Reduce the mask by what's currently shadowed
4693 */
4694 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4695 fGstRegs &= bmGstRegShadowsOld;
4696 if (fGstRegs)
4697 {
4698 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4699 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4700 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4701 if (bmGstRegShadowsNew)
4702 {
4703 /*
4704 * Partial.
4705 */
4706 do
4707 {
4708 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4709 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4710 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4711 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4712 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4713
4714 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4715 fGstRegs &= ~fInThisHstReg;
4716 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4717 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4718 if (!fGstRegShadowsNew)
4719 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4720 } while (fGstRegs != 0);
4721 }
4722 else
4723 {
4724 /*
4725 * Clear all.
4726 */
4727 do
4728 {
4729 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4730 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4731 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4732 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4733 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4734
4735 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4736 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4737 } while (fGstRegs != 0);
4738 pReNative->Core.bmHstRegsWithGstShadow = 0;
4739 }
4740 }
4741}
4742
4743
4744/**
4745 * Flushes guest register shadow copies held by a set of host registers.
4746 *
4747 * This is used with the TLB lookup code for ensuring that we don't carry on
4748 * with any guest shadows in volatile registers, as these will get corrupted by
4749 * a TLB miss.
4750 *
4751 * @param pReNative The native recompile state.
4752 * @param fHstRegs Set of host registers to flush guest shadows for.
4753 */
4754DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4755{
4756 /*
4757 * Reduce the mask by what's currently shadowed.
4758 */
4759 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4760 fHstRegs &= bmHstRegsWithGstShadowOld;
4761 if (fHstRegs)
4762 {
4763 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4764 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4765 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4766 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4767 if (bmHstRegsWithGstShadowNew)
4768 {
4769 /*
4770 * Partial (likely).
4771 */
4772 uint64_t fGstShadows = 0;
4773 do
4774 {
4775 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4776 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4777 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4778 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4779
4780 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4781 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4782 fHstRegs &= ~RT_BIT_32(idxHstReg);
4783 } while (fHstRegs != 0);
4784 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4785 }
4786 else
4787 {
4788 /*
4789 * Clear all.
4790 */
4791 do
4792 {
4793 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4794 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4795 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4796 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4797
4798 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4799 fHstRegs &= ~RT_BIT_32(idxHstReg);
4800 } while (fHstRegs != 0);
4801 pReNative->Core.bmGstRegShadows = 0;
4802 }
4803 }
4804}
4805
4806
4807/**
4808 * Restores guest shadow copies in volatile registers.
4809 *
4810 * This is used after calling a helper function (think TLB miss) to restore the
4811 * register state of volatile registers.
4812 *
4813 * @param pReNative The native recompile state.
4814 * @param off The code buffer offset.
4815 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4816 * be active (allocated) w/o asserting. Hack.
4817 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4818 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4819 */
4820DECL_HIDDEN_THROW(uint32_t)
4821iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4822{
4823 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4824 if (fHstRegs)
4825 {
4826 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4827 do
4828 {
4829 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4830
4831 /* It's not fatal if a register is active holding a variable that
4832 shadowing a guest register, ASSUMING all pending guest register
4833 writes were flushed prior to the helper call. However, we'll be
4834 emitting duplicate restores, so it wasts code space. */
4835 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4836 RT_NOREF(fHstRegsActiveShadows);
4837
4838 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4839 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4840 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4841 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4842
4843 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4844 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4845
4846 fHstRegs &= ~RT_BIT_32(idxHstReg);
4847 } while (fHstRegs != 0);
4848 }
4849 return off;
4850}
4851
4852
4853
4854
4855/*********************************************************************************************************************************
4856* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4857*********************************************************************************************************************************/
4858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4859
4860/**
4861 * Info about shadowed guest SIMD register values.
4862 * @see IEMNATIVEGSTSIMDREG
4863 */
4864static struct
4865{
4866 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4867 uint32_t offXmm;
4868 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4869 uint32_t offYmm;
4870 /** Name (for logging). */
4871 const char *pszName;
4872} const g_aGstSimdShadowInfo[] =
4873{
4874#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4875 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4876 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4877 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4878 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4879 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4880 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4881 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4882 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4883 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4884 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4885 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4886 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4887 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4888 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4889 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4890 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4891 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4892#undef CPUMCTX_OFF_AND_SIZE
4893};
4894AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4895
4896
4897#ifdef LOG_ENABLED
4898/** Host CPU SIMD register names. */
4899DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4900{
4901#ifdef RT_ARCH_AMD64
4902 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4903#elif RT_ARCH_ARM64
4904 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4905 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4906#else
4907# error "port me"
4908#endif
4909};
4910#endif
4911
4912
4913/**
4914 * Frees a temporary SIMD register.
4915 *
4916 * Any shadow copies of guest registers assigned to the host register will not
4917 * be flushed by this operation.
4918 */
4919DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4920{
4921 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4922 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4923 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4924 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4925 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4926}
4927
4928
4929/**
4930 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4931 *
4932 * @returns New code bufferoffset.
4933 * @param pReNative The native recompile state.
4934 * @param off Current code buffer position.
4935 * @param enmGstSimdReg The guest SIMD register to flush.
4936 */
4937DECL_HIDDEN_THROW(uint32_t)
4938iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4939{
4940 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4941
4942 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4943 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4944 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4945 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4946
4947 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4948 {
4949 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4950 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4951 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4952 }
4953
4954 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4955 {
4956 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4957 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4958 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4959 }
4960
4961 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4962 return off;
4963}
4964
4965
4966/**
4967 * Locate a register, possibly freeing one up.
4968 *
4969 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4970 * failed.
4971 *
4972 * @returns Host register number on success. Returns UINT8_MAX if no registers
4973 * found, the caller is supposed to deal with this and raise a
4974 * allocation type specific status code (if desired).
4975 *
4976 * @throws VBox status code if we're run into trouble spilling a variable of
4977 * recording debug info. Does NOT throw anything if we're out of
4978 * registers, though.
4979 */
4980static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4981 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4982{
4983 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4984 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4985 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4986
4987 /*
4988 * Try a freed register that's shadowing a guest register.
4989 */
4990 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4991 if (fRegs)
4992 {
4993 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4994
4995#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4996 /*
4997 * When we have livness information, we use it to kick out all shadowed
4998 * guest register that will not be needed any more in this TB. If we're
4999 * lucky, this may prevent us from ending up here again.
5000 *
5001 * Note! We must consider the previous entry here so we don't free
5002 * anything that the current threaded function requires (current
5003 * entry is produced by the next threaded function).
5004 */
5005 uint32_t const idxCurCall = pReNative->idxCurCall;
5006 if (idxCurCall > 0)
5007 {
5008 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5009
5010# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5011 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5012 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5013 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5014#else
5015 /* Construct a mask of the registers not in the read or write state.
5016 Note! We could skips writes, if they aren't from us, as this is just
5017 a hack to prevent trashing registers that have just been written
5018 or will be written when we retire the current instruction. */
5019 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5020 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5021 & IEMLIVENESSBIT_MASK;
5022#endif
5023 /* If it matches any shadowed registers. */
5024 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5025 {
5026 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5027 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5028 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5029
5030 /* See if we've got any unshadowed registers we can return now. */
5031 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5032 if (fUnshadowedRegs)
5033 {
5034 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5035 return (fPreferVolatile
5036 ? ASMBitFirstSetU32(fUnshadowedRegs)
5037 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5038 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5039 - 1;
5040 }
5041 }
5042 }
5043#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5044
5045 unsigned const idxReg = (fPreferVolatile
5046 ? ASMBitFirstSetU32(fRegs)
5047 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5048 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5049 - 1;
5050
5051 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5052 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5053 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5054 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5055 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5056
5057 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5058 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5059 uint32_t idxGstSimdReg = 0;
5060 do
5061 {
5062 if (fGstRegShadows & 0x1)
5063 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5064 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5065 idxGstSimdReg++;
5066 fGstRegShadows >>= 1;
5067 } while (fGstRegShadows);
5068
5069 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5070 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5071 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5072 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5073 return idxReg;
5074 }
5075
5076 /*
5077 * Try free up a variable that's in a register.
5078 *
5079 * We do two rounds here, first evacuating variables we don't need to be
5080 * saved on the stack, then in the second round move things to the stack.
5081 */
5082 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5083 AssertReleaseFailed(); /** @todo No variable support right now. */
5084#if 0
5085 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5086 {
5087 uint32_t fVars = pReNative->Core.bmSimdVars;
5088 while (fVars)
5089 {
5090 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5091 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5092 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5093 && (RT_BIT_32(idxReg) & fRegMask)
5094 && ( iLoop == 0
5095 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5096 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5097 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5098 {
5099 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5100 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5101 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5102 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5103 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5104 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5105
5106 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5107 {
5108 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5109 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5110 }
5111
5112 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5113 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5114
5115 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5116 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5117 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5118 return idxReg;
5119 }
5120 fVars &= ~RT_BIT_32(idxVar);
5121 }
5122 }
5123#endif
5124
5125 AssertFailed();
5126 return UINT8_MAX;
5127}
5128
5129
5130/**
5131 * Flushes a set of guest register shadow copies.
5132 *
5133 * This is usually done after calling a threaded function or a C-implementation
5134 * of an instruction.
5135 *
5136 * @param pReNative The native recompile state.
5137 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5138 */
5139DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5140{
5141 /*
5142 * Reduce the mask by what's currently shadowed
5143 */
5144 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5145 fGstSimdRegs &= bmGstSimdRegShadows;
5146 if (fGstSimdRegs)
5147 {
5148 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5149 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5150 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5151 if (bmGstSimdRegShadowsNew)
5152 {
5153 /*
5154 * Partial.
5155 */
5156 do
5157 {
5158 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5159 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5160 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5161 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5162 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5163 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5164
5165 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5166 fGstSimdRegs &= ~fInThisHstReg;
5167 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5168 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5169 if (!fGstRegShadowsNew)
5170 {
5171 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5172 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5173 }
5174 } while (fGstSimdRegs != 0);
5175 }
5176 else
5177 {
5178 /*
5179 * Clear all.
5180 */
5181 do
5182 {
5183 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5184 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5185 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5186 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5187 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5188 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5189
5190 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5191 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5192 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5193 } while (fGstSimdRegs != 0);
5194 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5195 }
5196 }
5197}
5198
5199
5200/**
5201 * Allocates a temporary host SIMD register.
5202 *
5203 * This may emit code to save register content onto the stack in order to free
5204 * up a register.
5205 *
5206 * @returns The host register number; throws VBox status code on failure,
5207 * so no need to check the return value.
5208 * @param pReNative The native recompile state.
5209 * @param poff Pointer to the variable with the code buffer position.
5210 * This will be update if we need to move a variable from
5211 * register to stack in order to satisfy the request.
5212 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5213 * registers (@c true, default) or the other way around
5214 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5215 */
5216DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5217{
5218 /*
5219 * Try find a completely unused register, preferably a call-volatile one.
5220 */
5221 uint8_t idxSimdReg;
5222 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5223 & ~pReNative->Core.bmHstRegsWithGstShadow
5224 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5225 if (fRegs)
5226 {
5227 if (fPreferVolatile)
5228 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5229 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5230 else
5231 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5232 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5233 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5234 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5235 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5236 }
5237 else
5238 {
5239 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5240 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5241 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5242 }
5243
5244 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5245 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5246}
5247
5248
5249/**
5250 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5251 * registers.
5252 *
5253 * @returns The host register number; throws VBox status code on failure,
5254 * so no need to check the return value.
5255 * @param pReNative The native recompile state.
5256 * @param poff Pointer to the variable with the code buffer position.
5257 * This will be update if we need to move a variable from
5258 * register to stack in order to satisfy the request.
5259 * @param fRegMask Mask of acceptable registers.
5260 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5261 * registers (@c true, default) or the other way around
5262 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5263 */
5264DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5265 bool fPreferVolatile /*= true*/)
5266{
5267 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5268 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5269
5270 /*
5271 * Try find a completely unused register, preferably a call-volatile one.
5272 */
5273 uint8_t idxSimdReg;
5274 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5275 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5276 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5277 & fRegMask;
5278 if (fRegs)
5279 {
5280 if (fPreferVolatile)
5281 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5282 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5283 else
5284 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5285 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5286 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5287 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5288 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5289 }
5290 else
5291 {
5292 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5293 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5294 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5295 }
5296
5297 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5298 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5299}
5300
5301
5302/**
5303 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5304 *
5305 * @param pReNative The native recompile state.
5306 * @param idxHstSimdReg The host SIMD register to update the state for.
5307 * @param enmLoadSz The load size to set.
5308 */
5309DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5310 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5311{
5312 /* Everything valid already? -> nothing to do. */
5313 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5314 return;
5315
5316 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5317 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5318 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5319 {
5320 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5321 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5322 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5323 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5324 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5325 }
5326}
5327
5328
5329static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5330 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5331{
5332 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5333 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5334 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5335 {
5336# ifdef RT_ARCH_ARM64
5337 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5338 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5339# endif
5340
5341 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5342 {
5343 switch (enmLoadSzDst)
5344 {
5345 case kIemNativeGstSimdRegLdStSz_256:
5346 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5347 break;
5348 case kIemNativeGstSimdRegLdStSz_Low128:
5349 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5350 break;
5351 case kIemNativeGstSimdRegLdStSz_High128:
5352 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5353 break;
5354 default:
5355 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5356 }
5357
5358 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5359 }
5360 }
5361 else
5362 {
5363 /* Complicated stuff where the source is currently missing something, later. */
5364 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5365 }
5366
5367 return off;
5368}
5369
5370
5371/**
5372 * Allocates a temporary host SIMD register for keeping a guest
5373 * SIMD register value.
5374 *
5375 * Since we may already have a register holding the guest register value,
5376 * code will be emitted to do the loading if that's not the case. Code may also
5377 * be emitted if we have to free up a register to satify the request.
5378 *
5379 * @returns The host register number; throws VBox status code on failure, so no
5380 * need to check the return value.
5381 * @param pReNative The native recompile state.
5382 * @param poff Pointer to the variable with the code buffer
5383 * position. This will be update if we need to move a
5384 * variable from register to stack in order to satisfy
5385 * the request.
5386 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5387 * @param enmIntendedUse How the caller will be using the host register.
5388 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5389 * register is okay (default). The ASSUMPTION here is
5390 * that the caller has already flushed all volatile
5391 * registers, so this is only applied if we allocate a
5392 * new register.
5393 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5394 */
5395DECL_HIDDEN_THROW(uint8_t)
5396iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5397 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5398 bool fNoVolatileRegs /*= false*/)
5399{
5400 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5401#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5402 AssertMsg( pReNative->idxCurCall == 0
5403 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5404 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5405 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5406 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5407 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5408 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5409#endif
5410#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5411 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5412#endif
5413 uint32_t const fRegMask = !fNoVolatileRegs
5414 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5415 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5416
5417 /*
5418 * First check if the guest register value is already in a host register.
5419 */
5420 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5421 {
5422 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5423 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5424 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5425 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5426
5427 /* It's not supposed to be allocated... */
5428 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5429 {
5430 /*
5431 * If the register will trash the guest shadow copy, try find a
5432 * completely unused register we can use instead. If that fails,
5433 * we need to disassociate the host reg from the guest reg.
5434 */
5435 /** @todo would be nice to know if preserving the register is in any way helpful. */
5436 /* If the purpose is calculations, try duplicate the register value as
5437 we'll be clobbering the shadow. */
5438 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5439 && ( ~pReNative->Core.bmHstSimdRegs
5440 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5441 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5442 {
5443 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5444
5445 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5446
5447 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5448 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5449 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5450 idxSimdReg = idxRegNew;
5451 }
5452 /* If the current register matches the restrictions, go ahead and allocate
5453 it for the caller. */
5454 else if (fRegMask & RT_BIT_32(idxSimdReg))
5455 {
5456 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5457 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5458 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5459 {
5460 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5461 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5462 else
5463 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5464 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5465 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5466 }
5467 else
5468 {
5469 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5470 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5471 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5472 }
5473 }
5474 /* Otherwise, allocate a register that satisfies the caller and transfer
5475 the shadowing if compatible with the intended use. (This basically
5476 means the call wants a non-volatile register (RSP push/pop scenario).) */
5477 else
5478 {
5479 Assert(fNoVolatileRegs);
5480 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5481 !fNoVolatileRegs
5482 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5483 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5484 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5485 {
5486 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5487 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5488 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5489 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5490 }
5491 else
5492 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5493 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5494 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5495 idxSimdReg = idxRegNew;
5496 }
5497 }
5498 else
5499 {
5500 /*
5501 * Oops. Shadowed guest register already allocated!
5502 *
5503 * Allocate a new register, copy the value and, if updating, the
5504 * guest shadow copy assignment to the new register.
5505 */
5506 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5507 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5508 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5509 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5510
5511 /** @todo share register for readonly access. */
5512 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5513 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5514
5515 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5516 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5517 else
5518 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5519
5520 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5521 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5522 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5523 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5524 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5525 else
5526 {
5527 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5528 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5529 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5530 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5531 }
5532 idxSimdReg = idxRegNew;
5533 }
5534 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5535
5536#ifdef VBOX_STRICT
5537 /* Strict builds: Check that the value is correct. */
5538 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5539 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5540#endif
5541
5542 return idxSimdReg;
5543 }
5544
5545 /*
5546 * Allocate a new register, load it with the guest value and designate it as a copy of the
5547 */
5548 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5549
5550 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5551 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5552 else
5553 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5554
5555 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5556 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5557
5558 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5559 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5560
5561 return idxRegNew;
5562}
5563
5564#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5565
5566
5567
5568/*********************************************************************************************************************************
5569* Code emitters for flushing pending guest register writes and sanity checks *
5570*********************************************************************************************************************************/
5571
5572#ifdef VBOX_STRICT
5573/**
5574 * Does internal register allocator sanity checks.
5575 */
5576DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5577{
5578 /*
5579 * Iterate host registers building a guest shadowing set.
5580 */
5581 uint64_t bmGstRegShadows = 0;
5582 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5583 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5584 while (bmHstRegsWithGstShadow)
5585 {
5586 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5587 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5588 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5589
5590 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5591 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5592 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5593 bmGstRegShadows |= fThisGstRegShadows;
5594 while (fThisGstRegShadows)
5595 {
5596 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5597 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5598 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5599 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5600 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5601 }
5602 }
5603 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5604 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5605 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5606
5607 /*
5608 * Now the other way around, checking the guest to host index array.
5609 */
5610 bmHstRegsWithGstShadow = 0;
5611 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5612 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5613 while (bmGstRegShadows)
5614 {
5615 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5616 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5617 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5618
5619 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5620 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5621 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5622 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5623 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5624 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5625 }
5626 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5627 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5628 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5629}
5630#endif /* VBOX_STRICT */
5631
5632
5633/**
5634 * Flushes any delayed guest register writes.
5635 *
5636 * This must be called prior to calling CImpl functions and any helpers that use
5637 * the guest state (like raising exceptions) and such.
5638 *
5639 * This optimization has not yet been implemented. The first target would be
5640 * RIP updates, since these are the most common ones.
5641 */
5642DECL_HIDDEN_THROW(uint32_t)
5643iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5644{
5645#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5646 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5647 off = iemNativeEmitPcWriteback(pReNative, off);
5648#else
5649 RT_NOREF(pReNative, fGstShwExcept);
5650#endif
5651
5652#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5653 /** @todo r=bird: There must be a quicker way to check if anything needs
5654 * doing and then call simd function to do the flushing */
5655 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5656 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5657 {
5658 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5659 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5660
5661 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5662 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5663
5664 if ( fFlushShadows
5665 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5666 {
5667 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5668
5669 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5670 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5671 }
5672 }
5673#else
5674 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5675#endif
5676
5677 return off;
5678}
5679
5680
5681#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5682/**
5683 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5684 */
5685DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5686{
5687 Assert(pReNative->Core.offPc);
5688# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5689 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5690 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5691# endif
5692
5693# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5694 /* Allocate a temporary PC register. */
5695 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5696
5697 /* Perform the addition and store the result. */
5698 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5699 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5700
5701 /* Free but don't flush the PC register. */
5702 iemNativeRegFreeTmp(pReNative, idxPcReg);
5703# else
5704 /* Compare the shadow with the context value, they should match. */
5705 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5706 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5707# endif
5708
5709 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5710 pReNative->Core.offPc = 0;
5711 pReNative->Core.cInstrPcUpdateSkipped = 0;
5712
5713 return off;
5714}
5715#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5716
5717
5718/*********************************************************************************************************************************
5719* Code Emitters (larger snippets) *
5720*********************************************************************************************************************************/
5721
5722/**
5723 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5724 * extending to 64-bit width.
5725 *
5726 * @returns New code buffer offset on success, UINT32_MAX on failure.
5727 * @param pReNative .
5728 * @param off The current code buffer position.
5729 * @param idxHstReg The host register to load the guest register value into.
5730 * @param enmGstReg The guest register to load.
5731 *
5732 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5733 * that is something the caller needs to do if applicable.
5734 */
5735DECL_HIDDEN_THROW(uint32_t)
5736iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5737{
5738 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5739 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5740
5741 switch (g_aGstShadowInfo[enmGstReg].cb)
5742 {
5743 case sizeof(uint64_t):
5744 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5745 case sizeof(uint32_t):
5746 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5747 case sizeof(uint16_t):
5748 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5749#if 0 /* not present in the table. */
5750 case sizeof(uint8_t):
5751 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5752#endif
5753 default:
5754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5755 }
5756}
5757
5758
5759#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5760/**
5761 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5762 *
5763 * @returns New code buffer offset on success, UINT32_MAX on failure.
5764 * @param pReNative The recompiler state.
5765 * @param off The current code buffer position.
5766 * @param idxHstSimdReg The host register to load the guest register value into.
5767 * @param enmGstSimdReg The guest register to load.
5768 * @param enmLoadSz The load size of the register.
5769 *
5770 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5771 * that is something the caller needs to do if applicable.
5772 */
5773DECL_HIDDEN_THROW(uint32_t)
5774iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5775 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5776{
5777 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5778
5779 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5780 switch (enmLoadSz)
5781 {
5782 case kIemNativeGstSimdRegLdStSz_256:
5783 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5784 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5785 case kIemNativeGstSimdRegLdStSz_Low128:
5786 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5787 case kIemNativeGstSimdRegLdStSz_High128:
5788 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5789 default:
5790 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5791 }
5792}
5793#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5794
5795#ifdef VBOX_STRICT
5796
5797/**
5798 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5799 *
5800 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5801 * Trashes EFLAGS on AMD64.
5802 */
5803DECL_HIDDEN_THROW(uint32_t)
5804iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5805{
5806# ifdef RT_ARCH_AMD64
5807 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5808
5809 /* rol reg64, 32 */
5810 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5811 pbCodeBuf[off++] = 0xc1;
5812 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5813 pbCodeBuf[off++] = 32;
5814
5815 /* test reg32, ffffffffh */
5816 if (idxReg >= 8)
5817 pbCodeBuf[off++] = X86_OP_REX_B;
5818 pbCodeBuf[off++] = 0xf7;
5819 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5820 pbCodeBuf[off++] = 0xff;
5821 pbCodeBuf[off++] = 0xff;
5822 pbCodeBuf[off++] = 0xff;
5823 pbCodeBuf[off++] = 0xff;
5824
5825 /* je/jz +1 */
5826 pbCodeBuf[off++] = 0x74;
5827 pbCodeBuf[off++] = 0x01;
5828
5829 /* int3 */
5830 pbCodeBuf[off++] = 0xcc;
5831
5832 /* rol reg64, 32 */
5833 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5834 pbCodeBuf[off++] = 0xc1;
5835 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5836 pbCodeBuf[off++] = 32;
5837
5838# elif defined(RT_ARCH_ARM64)
5839 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5840 /* lsr tmp0, reg64, #32 */
5841 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5842 /* cbz tmp0, +1 */
5843 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5844 /* brk #0x1100 */
5845 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5846
5847# else
5848# error "Port me!"
5849# endif
5850 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5851 return off;
5852}
5853
5854
5855/**
5856 * Emitting code that checks that the content of register @a idxReg is the same
5857 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5858 * instruction if that's not the case.
5859 *
5860 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5861 * Trashes EFLAGS on AMD64.
5862 */
5863DECL_HIDDEN_THROW(uint32_t)
5864iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5865{
5866# ifdef RT_ARCH_AMD64
5867 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5868
5869 /* cmp reg, [mem] */
5870 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5871 {
5872 if (idxReg >= 8)
5873 pbCodeBuf[off++] = X86_OP_REX_R;
5874 pbCodeBuf[off++] = 0x38;
5875 }
5876 else
5877 {
5878 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5879 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5880 else
5881 {
5882 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5883 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5884 else
5885 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5886 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5887 if (idxReg >= 8)
5888 pbCodeBuf[off++] = X86_OP_REX_R;
5889 }
5890 pbCodeBuf[off++] = 0x39;
5891 }
5892 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5893
5894 /* je/jz +1 */
5895 pbCodeBuf[off++] = 0x74;
5896 pbCodeBuf[off++] = 0x01;
5897
5898 /* int3 */
5899 pbCodeBuf[off++] = 0xcc;
5900
5901 /* For values smaller than the register size, we must check that the rest
5902 of the register is all zeros. */
5903 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5904 {
5905 /* test reg64, imm32 */
5906 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5907 pbCodeBuf[off++] = 0xf7;
5908 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5909 pbCodeBuf[off++] = 0;
5910 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5911 pbCodeBuf[off++] = 0xff;
5912 pbCodeBuf[off++] = 0xff;
5913
5914 /* je/jz +1 */
5915 pbCodeBuf[off++] = 0x74;
5916 pbCodeBuf[off++] = 0x01;
5917
5918 /* int3 */
5919 pbCodeBuf[off++] = 0xcc;
5920 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5921 }
5922 else
5923 {
5924 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5925 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5926 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5927 }
5928
5929# elif defined(RT_ARCH_ARM64)
5930 /* mov TMP0, [gstreg] */
5931 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5932
5933 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5934 /* sub tmp0, tmp0, idxReg */
5935 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5936 /* cbz tmp0, +1 */
5937 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5938 /* brk #0x1000+enmGstReg */
5939 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5940 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5941
5942# else
5943# error "Port me!"
5944# endif
5945 return off;
5946}
5947
5948
5949# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5950/**
5951 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5952 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5953 * instruction if that's not the case.
5954 *
5955 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5956 * Trashes EFLAGS on AMD64.
5957 */
5958DECL_HIDDEN_THROW(uint32_t)
5959iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5960 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5961{
5962 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5963 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5964 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5965 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5966 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5967 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5968 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5969 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5970 return off;
5971
5972# ifdef RT_ARCH_AMD64
5973 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
5974
5975 /* movdqa vectmp0, idxSimdReg */
5976 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5977
5978 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5979
5980 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5981 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5982 if (idxSimdReg >= 8)
5983 pbCodeBuf[off++] = X86_OP_REX_R;
5984 pbCodeBuf[off++] = 0x0f;
5985 pbCodeBuf[off++] = 0x38;
5986 pbCodeBuf[off++] = 0x29;
5987 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5988
5989 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5990 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5991 pbCodeBuf[off++] = X86_OP_REX_W
5992 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
5993 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5994 pbCodeBuf[off++] = 0x0f;
5995 pbCodeBuf[off++] = 0x3a;
5996 pbCodeBuf[off++] = 0x16;
5997 pbCodeBuf[off++] = 0xeb;
5998 pbCodeBuf[off++] = 0x00;
5999
6000 /* test tmp0, 0xffffffff. */
6001 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6002 pbCodeBuf[off++] = 0xf7;
6003 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6004 pbCodeBuf[off++] = 0xff;
6005 pbCodeBuf[off++] = 0xff;
6006 pbCodeBuf[off++] = 0xff;
6007 pbCodeBuf[off++] = 0xff;
6008
6009 /* je/jz +1 */
6010 pbCodeBuf[off++] = 0x74;
6011 pbCodeBuf[off++] = 0x01;
6012
6013 /* int3 */
6014 pbCodeBuf[off++] = 0xcc;
6015
6016 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6017 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6018 pbCodeBuf[off++] = X86_OP_REX_W
6019 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6020 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6021 pbCodeBuf[off++] = 0x0f;
6022 pbCodeBuf[off++] = 0x3a;
6023 pbCodeBuf[off++] = 0x16;
6024 pbCodeBuf[off++] = 0xeb;
6025 pbCodeBuf[off++] = 0x01;
6026
6027 /* test tmp0, 0xffffffff. */
6028 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6029 pbCodeBuf[off++] = 0xf7;
6030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6031 pbCodeBuf[off++] = 0xff;
6032 pbCodeBuf[off++] = 0xff;
6033 pbCodeBuf[off++] = 0xff;
6034 pbCodeBuf[off++] = 0xff;
6035
6036 /* je/jz +1 */
6037 pbCodeBuf[off++] = 0x74;
6038 pbCodeBuf[off++] = 0x01;
6039
6040 /* int3 */
6041 pbCodeBuf[off++] = 0xcc;
6042
6043# elif defined(RT_ARCH_ARM64)
6044 /* mov vectmp0, [gstreg] */
6045 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6046
6047 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6048 {
6049 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6050 /* eor vectmp0, vectmp0, idxSimdReg */
6051 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6052 /* cnt vectmp0, vectmp0, #0*/
6053 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6054 /* umov tmp0, vectmp0.D[0] */
6055 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6056 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6057 /* cbz tmp0, +1 */
6058 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6059 /* brk #0x1000+enmGstReg */
6060 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6061 }
6062
6063 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6064 {
6065 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6066 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6067 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6068 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6069 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6070 /* umov tmp0, (vectmp0 + 1).D[0] */
6071 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6072 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6073 /* cbz tmp0, +1 */
6074 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6075 /* brk #0x1000+enmGstReg */
6076 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6077 }
6078
6079# else
6080# error "Port me!"
6081# endif
6082
6083 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6084 return off;
6085}
6086# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6087
6088
6089/**
6090 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6091 * important bits.
6092 *
6093 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6094 * Trashes EFLAGS on AMD64.
6095 */
6096DECL_HIDDEN_THROW(uint32_t)
6097iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6098{
6099 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6100 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6101 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6102 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6103
6104#ifdef RT_ARCH_AMD64
6105 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6106
6107 /* je/jz +1 */
6108 pbCodeBuf[off++] = 0x74;
6109 pbCodeBuf[off++] = 0x01;
6110
6111 /* int3 */
6112 pbCodeBuf[off++] = 0xcc;
6113
6114# elif defined(RT_ARCH_ARM64)
6115 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6116
6117 /* b.eq +1 */
6118 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6119 /* brk #0x2000 */
6120 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6121
6122# else
6123# error "Port me!"
6124# endif
6125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6126
6127 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6128 return off;
6129}
6130
6131#endif /* VBOX_STRICT */
6132
6133/**
6134 * Emits a code for checking the return code of a call and rcPassUp, returning
6135 * from the code if either are non-zero.
6136 */
6137DECL_HIDDEN_THROW(uint32_t)
6138iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6139{
6140#ifdef RT_ARCH_AMD64
6141 /*
6142 * AMD64: eax = call status code.
6143 */
6144
6145 /* edx = rcPassUp */
6146 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6147# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6148 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6149# endif
6150
6151 /* edx = eax | rcPassUp */
6152 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6153 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6154 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6155 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6156
6157 /* Jump to non-zero status return path. */
6158 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6159
6160 /* done. */
6161
6162#elif RT_ARCH_ARM64
6163 /*
6164 * ARM64: w0 = call status code.
6165 */
6166# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6167 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6168# endif
6169 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6170
6171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6172
6173 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6174
6175 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6176 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6177 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6178
6179#else
6180# error "port me"
6181#endif
6182 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6183 RT_NOREF_PV(idxInstr);
6184 return off;
6185}
6186
6187
6188/**
6189 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6190 * raising a \#GP(0) if it isn't.
6191 *
6192 * @returns New code buffer offset, UINT32_MAX on failure.
6193 * @param pReNative The native recompile state.
6194 * @param off The code buffer offset.
6195 * @param idxAddrReg The host register with the address to check.
6196 * @param idxInstr The current instruction.
6197 */
6198DECL_HIDDEN_THROW(uint32_t)
6199iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6200{
6201 /*
6202 * Make sure we don't have any outstanding guest register writes as we may
6203 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6204 */
6205 off = iemNativeRegFlushPendingWrites(pReNative, off);
6206
6207#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6208 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6209#else
6210 RT_NOREF(idxInstr);
6211#endif
6212
6213#ifdef RT_ARCH_AMD64
6214 /*
6215 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6216 * return raisexcpt();
6217 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6218 */
6219 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6220
6221 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6222 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6223 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6224 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6225 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6226
6227 iemNativeRegFreeTmp(pReNative, iTmpReg);
6228
6229#elif defined(RT_ARCH_ARM64)
6230 /*
6231 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6232 * return raisexcpt();
6233 * ----
6234 * mov x1, 0x800000000000
6235 * add x1, x0, x1
6236 * cmp xzr, x1, lsr 48
6237 * b.ne .Lraisexcpt
6238 */
6239 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6240
6241 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6242 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6243 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6244 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6245
6246 iemNativeRegFreeTmp(pReNative, iTmpReg);
6247
6248#else
6249# error "Port me"
6250#endif
6251 return off;
6252}
6253
6254
6255/**
6256 * Emits code to check if that the content of @a idxAddrReg is within the limit
6257 * of CS, raising a \#GP(0) if it isn't.
6258 *
6259 * @returns New code buffer offset; throws VBox status code on error.
6260 * @param pReNative The native recompile state.
6261 * @param off The code buffer offset.
6262 * @param idxAddrReg The host register (32-bit) with the address to
6263 * check.
6264 * @param idxInstr The current instruction.
6265 */
6266DECL_HIDDEN_THROW(uint32_t)
6267iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6268 uint8_t idxAddrReg, uint8_t idxInstr)
6269{
6270 /*
6271 * Make sure we don't have any outstanding guest register writes as we may
6272 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6273 */
6274 off = iemNativeRegFlushPendingWrites(pReNative, off);
6275
6276#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6277 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6278#else
6279 RT_NOREF(idxInstr);
6280#endif
6281
6282 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6283 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6284 kIemNativeGstRegUse_ReadOnly);
6285
6286 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6287 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6288
6289 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6290 return off;
6291}
6292
6293
6294/**
6295 * Emits a call to a CImpl function or something similar.
6296 */
6297DECL_HIDDEN_THROW(uint32_t)
6298iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6299 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6300{
6301 /* Writeback everything. */
6302 off = iemNativeRegFlushPendingWrites(pReNative, off);
6303
6304 /*
6305 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6306 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6307 */
6308 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6309 fGstShwFlush
6310 | RT_BIT_64(kIemNativeGstReg_Pc)
6311 | RT_BIT_64(kIemNativeGstReg_EFlags));
6312 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6313
6314 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6315
6316 /*
6317 * Load the parameters.
6318 */
6319#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6320 /* Special code the hidden VBOXSTRICTRC pointer. */
6321 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6322 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6323 if (cAddParams > 0)
6324 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6325 if (cAddParams > 1)
6326 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6327 if (cAddParams > 2)
6328 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6329 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6330
6331#else
6332 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6333 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6334 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6335 if (cAddParams > 0)
6336 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6337 if (cAddParams > 1)
6338 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6339 if (cAddParams > 2)
6340# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6341 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6342# else
6343 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6344# endif
6345#endif
6346
6347 /*
6348 * Make the call.
6349 */
6350 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6351
6352#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6353 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6354#endif
6355
6356 /*
6357 * Check the status code.
6358 */
6359 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6360}
6361
6362
6363/**
6364 * Emits a call to a threaded worker function.
6365 */
6366DECL_HIDDEN_THROW(uint32_t)
6367iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6368{
6369 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6370 off = iemNativeRegFlushPendingWrites(pReNative, off);
6371
6372 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6373 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6374
6375#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6376 /* The threaded function may throw / long jmp, so set current instruction
6377 number if we're counting. */
6378 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6379#endif
6380
6381 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6382
6383#ifdef RT_ARCH_AMD64
6384 /* Load the parameters and emit the call. */
6385# ifdef RT_OS_WINDOWS
6386# ifndef VBOXSTRICTRC_STRICT_ENABLED
6387 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6388 if (cParams > 0)
6389 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6390 if (cParams > 1)
6391 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6392 if (cParams > 2)
6393 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6394# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6395 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6396 if (cParams > 0)
6397 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6398 if (cParams > 1)
6399 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6400 if (cParams > 2)
6401 {
6402 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6403 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6404 }
6405 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6406# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6407# else
6408 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6409 if (cParams > 0)
6410 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6411 if (cParams > 1)
6412 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6413 if (cParams > 2)
6414 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6415# endif
6416
6417 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6418
6419# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6420 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6421# endif
6422
6423#elif RT_ARCH_ARM64
6424 /*
6425 * ARM64:
6426 */
6427 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6428 if (cParams > 0)
6429 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6430 if (cParams > 1)
6431 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6432 if (cParams > 2)
6433 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6434
6435 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6436
6437#else
6438# error "port me"
6439#endif
6440
6441 /*
6442 * Check the status code.
6443 */
6444 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6445
6446 return off;
6447}
6448
6449#ifdef VBOX_WITH_STATISTICS
6450/**
6451 * Emits code to update the thread call statistics.
6452 */
6453DECL_INLINE_THROW(uint32_t)
6454iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6455{
6456 /*
6457 * Update threaded function stats.
6458 */
6459 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6460 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6461# if defined(RT_ARCH_ARM64)
6462 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6463 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6464 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6465 iemNativeRegFreeTmp(pReNative, idxTmp1);
6466 iemNativeRegFreeTmp(pReNative, idxTmp2);
6467# else
6468 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6469# endif
6470 return off;
6471}
6472#endif /* VBOX_WITH_STATISTICS */
6473
6474
6475/**
6476 * Emits the code at the CheckBranchMiss label.
6477 */
6478static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6479{
6480 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6481 if (idxLabel != UINT32_MAX)
6482 {
6483 iemNativeLabelDefine(pReNative, idxLabel, off);
6484
6485 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6486 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6487 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6488
6489 /* jump back to the return sequence. */
6490 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6491 }
6492 return off;
6493}
6494
6495
6496/**
6497 * Emits the code at the NeedCsLimChecking label.
6498 */
6499static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6500{
6501 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6502 if (idxLabel != UINT32_MAX)
6503 {
6504 iemNativeLabelDefine(pReNative, idxLabel, off);
6505
6506 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6507 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6508 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6509
6510 /* jump back to the return sequence. */
6511 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6512 }
6513 return off;
6514}
6515
6516
6517/**
6518 * Emits the code at the ObsoleteTb label.
6519 */
6520static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6521{
6522 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6523 if (idxLabel != UINT32_MAX)
6524 {
6525 iemNativeLabelDefine(pReNative, idxLabel, off);
6526
6527 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6528 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6529 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6530
6531 /* jump back to the return sequence. */
6532 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6533 }
6534 return off;
6535}
6536
6537
6538/**
6539 * Emits the code at the RaiseGP0 label.
6540 */
6541static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6542{
6543 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6544 if (idxLabel != UINT32_MAX)
6545 {
6546 iemNativeLabelDefine(pReNative, idxLabel, off);
6547
6548 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6549 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6550 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6551
6552 /* jump back to the return sequence. */
6553 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6554 }
6555 return off;
6556}
6557
6558
6559/**
6560 * Emits the code at the RaiseNm label.
6561 */
6562static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6563{
6564 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
6565 if (idxLabel != UINT32_MAX)
6566 {
6567 iemNativeLabelDefine(pReNative, idxLabel, off);
6568
6569 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
6570 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6571 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
6572
6573 /* jump back to the return sequence. */
6574 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6575 }
6576 return off;
6577}
6578
6579
6580/**
6581 * Emits the code at the RaiseUd label.
6582 */
6583static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6584{
6585 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
6586 if (idxLabel != UINT32_MAX)
6587 {
6588 iemNativeLabelDefine(pReNative, idxLabel, off);
6589
6590 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
6591 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6592 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
6593
6594 /* jump back to the return sequence. */
6595 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6596 }
6597 return off;
6598}
6599
6600
6601/**
6602 * Emits the code at the RaiseMf label.
6603 */
6604static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6605{
6606 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
6607 if (idxLabel != UINT32_MAX)
6608 {
6609 iemNativeLabelDefine(pReNative, idxLabel, off);
6610
6611 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
6612 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6613 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
6614
6615 /* jump back to the return sequence. */
6616 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6617 }
6618 return off;
6619}
6620
6621
6622/**
6623 * Emits the code at the RaiseXf label.
6624 */
6625static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6626{
6627 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
6628 if (idxLabel != UINT32_MAX)
6629 {
6630 iemNativeLabelDefine(pReNative, idxLabel, off);
6631
6632 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
6633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6634 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
6635
6636 /* jump back to the return sequence. */
6637 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6638 }
6639 return off;
6640}
6641
6642
6643/**
6644 * Emits the code at the ReturnWithFlags label (returns
6645 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6646 */
6647static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6648{
6649 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6650 if (idxLabel != UINT32_MAX)
6651 {
6652 iemNativeLabelDefine(pReNative, idxLabel, off);
6653
6654 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6655
6656 /* jump back to the return sequence. */
6657 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6658 }
6659 return off;
6660}
6661
6662
6663/**
6664 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6665 */
6666static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6667{
6668 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6669 if (idxLabel != UINT32_MAX)
6670 {
6671 iemNativeLabelDefine(pReNative, idxLabel, off);
6672
6673 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6674
6675 /* jump back to the return sequence. */
6676 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6677 }
6678 return off;
6679}
6680
6681
6682/**
6683 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6684 */
6685static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6686{
6687 /*
6688 * Generate the rc + rcPassUp fiddling code if needed.
6689 */
6690 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6691 if (idxLabel != UINT32_MAX)
6692 {
6693 iemNativeLabelDefine(pReNative, idxLabel, off);
6694
6695 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6696#ifdef RT_ARCH_AMD64
6697# ifdef RT_OS_WINDOWS
6698# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6699 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6700# endif
6701 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6702 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6703# else
6704 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6705 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6706# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6707 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6708# endif
6709# endif
6710# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6711 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6712# endif
6713
6714#else
6715 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6716 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6717 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6718#endif
6719
6720 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6721 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6722 }
6723 return off;
6724}
6725
6726
6727/**
6728 * Emits a standard epilog.
6729 */
6730static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6731{
6732 *pidxReturnLabel = UINT32_MAX;
6733
6734 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6735 off = iemNativeRegFlushPendingWrites(pReNative, off);
6736
6737 /*
6738 * Successful return, so clear the return register (eax, w0).
6739 */
6740 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6741
6742 /*
6743 * Define label for common return point.
6744 */
6745 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6746 *pidxReturnLabel = idxReturn;
6747
6748 /*
6749 * Restore registers and return.
6750 */
6751#ifdef RT_ARCH_AMD64
6752 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6753
6754 /* Reposition esp at the r15 restore point. */
6755 pbCodeBuf[off++] = X86_OP_REX_W;
6756 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6757 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6758 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6759
6760 /* Pop non-volatile registers and return */
6761 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6762 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6763 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6764 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6765 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6766 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6767 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6768 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6769# ifdef RT_OS_WINDOWS
6770 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6771 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6772# endif
6773 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6774 pbCodeBuf[off++] = 0xc9; /* leave */
6775 pbCodeBuf[off++] = 0xc3; /* ret */
6776 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6777
6778#elif RT_ARCH_ARM64
6779 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6780
6781 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6782 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6783 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6784 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6785 IEMNATIVE_FRAME_VAR_SIZE / 8);
6786 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6787 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6788 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6789 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6790 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6791 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6792 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6793 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6794 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6795 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6796 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6797 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6798
6799 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6800 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6801 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6802 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6803
6804 /* retab / ret */
6805# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6806 if (1)
6807 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6808 else
6809# endif
6810 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6811
6812#else
6813# error "port me"
6814#endif
6815 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6816
6817 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6818}
6819
6820
6821/**
6822 * Emits a standard prolog.
6823 */
6824static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6825{
6826#ifdef RT_ARCH_AMD64
6827 /*
6828 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6829 * reserving 64 bytes for stack variables plus 4 non-register argument
6830 * slots. Fixed register assignment: xBX = pReNative;
6831 *
6832 * Since we always do the same register spilling, we can use the same
6833 * unwind description for all the code.
6834 */
6835 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6836 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6837 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6838 pbCodeBuf[off++] = 0x8b;
6839 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6840 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6841 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6842# ifdef RT_OS_WINDOWS
6843 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6844 pbCodeBuf[off++] = 0x8b;
6845 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6846 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6847 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6848# else
6849 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6850 pbCodeBuf[off++] = 0x8b;
6851 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6852# endif
6853 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6854 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6855 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6856 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6857 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6858 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6859 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6860 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6861
6862# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6863 /* Save the frame pointer. */
6864 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6865# endif
6866
6867 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6868 X86_GREG_xSP,
6869 IEMNATIVE_FRAME_ALIGN_SIZE
6870 + IEMNATIVE_FRAME_VAR_SIZE
6871 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6872 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6873 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6874 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6875 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6876
6877#elif RT_ARCH_ARM64
6878 /*
6879 * We set up a stack frame exactly like on x86, only we have to push the
6880 * return address our selves here. We save all non-volatile registers.
6881 */
6882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6883
6884# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6885 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6886 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6887 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6888 /* pacibsp */
6889 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6890# endif
6891
6892 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6893 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6894 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6895 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6896 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6897 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6898 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6899 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6900 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6901 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6902 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6903 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6904 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6905 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6906 /* Save the BP and LR (ret address) registers at the top of the frame. */
6907 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6908 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6909 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6910 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6911 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6912 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6913
6914 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6915 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6916
6917 /* mov r28, r0 */
6918 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6919 /* mov r27, r1 */
6920 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6921
6922# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6923 /* Save the frame pointer. */
6924 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6925 ARMV8_A64_REG_X2);
6926# endif
6927
6928#else
6929# error "port me"
6930#endif
6931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6932 return off;
6933}
6934
6935
6936/*********************************************************************************************************************************
6937* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6938*********************************************************************************************************************************/
6939
6940/**
6941 * Internal work that allocates a variable with kind set to
6942 * kIemNativeVarKind_Invalid and no current stack allocation.
6943 *
6944 * The kind will either be set by the caller or later when the variable is first
6945 * assigned a value.
6946 *
6947 * @returns Unpacked index.
6948 * @internal
6949 */
6950static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6951{
6952 Assert(cbType > 0 && cbType <= 64);
6953 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6954 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6955 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6956 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6957 pReNative->Core.aVars[idxVar].cbVar = cbType;
6958 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6959 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6960 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6961 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6962 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6963 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6964 pReNative->Core.aVars[idxVar].u.uValue = 0;
6965 return idxVar;
6966}
6967
6968
6969/**
6970 * Internal work that allocates an argument variable w/o setting enmKind.
6971 *
6972 * @returns Unpacked index.
6973 * @internal
6974 */
6975static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6976{
6977 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6978 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6979 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6980
6981 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6982 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6983 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6984 return idxVar;
6985}
6986
6987
6988/**
6989 * Gets the stack slot for a stack variable, allocating one if necessary.
6990 *
6991 * Calling this function implies that the stack slot will contain a valid
6992 * variable value. The caller deals with any register currently assigned to the
6993 * variable, typically by spilling it into the stack slot.
6994 *
6995 * @returns The stack slot number.
6996 * @param pReNative The recompiler state.
6997 * @param idxVar The variable.
6998 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6999 */
7000DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7001{
7002 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7003 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7004 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7005
7006 /* Already got a slot? */
7007 uint8_t const idxStackSlot = pVar->idxStackSlot;
7008 if (idxStackSlot != UINT8_MAX)
7009 {
7010 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7011 return idxStackSlot;
7012 }
7013
7014 /*
7015 * A single slot is easy to allocate.
7016 * Allocate them from the top end, closest to BP, to reduce the displacement.
7017 */
7018 if (pVar->cbVar <= sizeof(uint64_t))
7019 {
7020 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7021 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7022 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7023 pVar->idxStackSlot = (uint8_t)iSlot;
7024 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7025 return (uint8_t)iSlot;
7026 }
7027
7028 /*
7029 * We need more than one stack slot.
7030 *
7031 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7032 */
7033 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7034 Assert(pVar->cbVar <= 64);
7035 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7036 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7037 uint32_t bmStack = ~pReNative->Core.bmStack;
7038 while (bmStack != UINT32_MAX)
7039 {
7040/** @todo allocate from the top to reduce BP displacement. */
7041 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7042 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7043 if (!(iSlot & fBitAlignMask))
7044 {
7045 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7046 {
7047 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7048 pVar->idxStackSlot = (uint8_t)iSlot;
7049 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7050 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7051 return (uint8_t)iSlot;
7052 }
7053 }
7054 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7055 }
7056 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7057}
7058
7059
7060/**
7061 * Changes the variable to a stack variable.
7062 *
7063 * Currently this is s only possible to do the first time the variable is used,
7064 * switching later is can be implemented but not done.
7065 *
7066 * @param pReNative The recompiler state.
7067 * @param idxVar The variable.
7068 * @throws VERR_IEM_VAR_IPE_2
7069 */
7070DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7071{
7072 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7073 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7074 if (pVar->enmKind != kIemNativeVarKind_Stack)
7075 {
7076 /* We could in theory transition from immediate to stack as well, but it
7077 would involve the caller doing work storing the value on the stack. So,
7078 till that's required we only allow transition from invalid. */
7079 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7080 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7081 pVar->enmKind = kIemNativeVarKind_Stack;
7082
7083 /* Note! We don't allocate a stack slot here, that's only done when a
7084 slot is actually needed to hold a variable value. */
7085 }
7086}
7087
7088
7089/**
7090 * Sets it to a variable with a constant value.
7091 *
7092 * This does not require stack storage as we know the value and can always
7093 * reload it, unless of course it's referenced.
7094 *
7095 * @param pReNative The recompiler state.
7096 * @param idxVar The variable.
7097 * @param uValue The immediate value.
7098 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7099 */
7100DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7101{
7102 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7103 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7104 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7105 {
7106 /* Only simple transitions for now. */
7107 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7108 pVar->enmKind = kIemNativeVarKind_Immediate;
7109 }
7110 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7111
7112 pVar->u.uValue = uValue;
7113 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7114 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7115 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7116}
7117
7118
7119/**
7120 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7121 *
7122 * This does not require stack storage as we know the value and can always
7123 * reload it. Loading is postponed till needed.
7124 *
7125 * @param pReNative The recompiler state.
7126 * @param idxVar The variable. Unpacked.
7127 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7128 *
7129 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7130 * @internal
7131 */
7132static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7133{
7134 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7135 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7136
7137 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7138 {
7139 /* Only simple transitions for now. */
7140 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7141 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7142 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7143 }
7144 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7145
7146 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7147
7148 /* Update the other variable, ensure it's a stack variable. */
7149 /** @todo handle variables with const values... that'll go boom now. */
7150 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7151 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7152}
7153
7154
7155/**
7156 * Sets the variable to a reference (pointer) to a guest register reference.
7157 *
7158 * This does not require stack storage as we know the value and can always
7159 * reload it. Loading is postponed till needed.
7160 *
7161 * @param pReNative The recompiler state.
7162 * @param idxVar The variable.
7163 * @param enmRegClass The class guest registers to reference.
7164 * @param idxReg The register within @a enmRegClass to reference.
7165 *
7166 * @throws VERR_IEM_VAR_IPE_2
7167 */
7168DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7169 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7170{
7171 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7172 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7173
7174 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7175 {
7176 /* Only simple transitions for now. */
7177 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7178 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7179 }
7180 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7181
7182 pVar->u.GstRegRef.enmClass = enmRegClass;
7183 pVar->u.GstRegRef.idx = idxReg;
7184}
7185
7186
7187DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7188{
7189 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7190}
7191
7192
7193DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7194{
7195 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7196
7197 /* Since we're using a generic uint64_t value type, we must truncate it if
7198 the variable is smaller otherwise we may end up with too large value when
7199 scaling up a imm8 w/ sign-extension.
7200
7201 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7202 in the bios, bx=1) when running on arm, because clang expect 16-bit
7203 register parameters to have bits 16 and up set to zero. Instead of
7204 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7205 CF value in the result. */
7206 switch (cbType)
7207 {
7208 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7209 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7210 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7211 }
7212 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7213 return idxVar;
7214}
7215
7216
7217DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7218{
7219 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7220 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7221 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7222 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7223 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7224 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7225
7226 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7227 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7228 return idxArgVar;
7229}
7230
7231
7232DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7233{
7234 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7235 /* Don't set to stack now, leave that to the first use as for instance
7236 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7237 return idxVar;
7238}
7239
7240
7241DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7242{
7243 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7244
7245 /* Since we're using a generic uint64_t value type, we must truncate it if
7246 the variable is smaller otherwise we may end up with too large value when
7247 scaling up a imm8 w/ sign-extension. */
7248 switch (cbType)
7249 {
7250 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7251 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7252 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7253 }
7254 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7255 return idxVar;
7256}
7257
7258
7259/**
7260 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7261 * fixed till we call iemNativeVarRegisterRelease.
7262 *
7263 * @returns The host register number.
7264 * @param pReNative The recompiler state.
7265 * @param idxVar The variable.
7266 * @param poff Pointer to the instruction buffer offset.
7267 * In case a register needs to be freed up or the value
7268 * loaded off the stack.
7269 * @param fInitialized Set if the variable must already have been initialized.
7270 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7271 * the case.
7272 * @param idxRegPref Preferred register number or UINT8_MAX.
7273 */
7274DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7275 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7276{
7277 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7278 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7279 Assert(pVar->cbVar <= 8);
7280 Assert(!pVar->fRegAcquired);
7281
7282 uint8_t idxReg = pVar->idxReg;
7283 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7284 {
7285 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7286 && pVar->enmKind < kIemNativeVarKind_End);
7287 pVar->fRegAcquired = true;
7288 return idxReg;
7289 }
7290
7291 /*
7292 * If the kind of variable has not yet been set, default to 'stack'.
7293 */
7294 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7295 && pVar->enmKind < kIemNativeVarKind_End);
7296 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7297 iemNativeVarSetKindToStack(pReNative, idxVar);
7298
7299 /*
7300 * We have to allocate a register for the variable, even if its a stack one
7301 * as we don't know if there are modification being made to it before its
7302 * finalized (todo: analyze and insert hints about that?).
7303 *
7304 * If we can, we try get the correct register for argument variables. This
7305 * is assuming that most argument variables are fetched as close as possible
7306 * to the actual call, so that there aren't any interfering hidden calls
7307 * (memory accesses, etc) inbetween.
7308 *
7309 * If we cannot or it's a variable, we make sure no argument registers
7310 * that will be used by this MC block will be allocated here, and we always
7311 * prefer non-volatile registers to avoid needing to spill stuff for internal
7312 * call.
7313 */
7314 /** @todo Detect too early argument value fetches and warn about hidden
7315 * calls causing less optimal code to be generated in the python script. */
7316
7317 uint8_t const uArgNo = pVar->uArgNo;
7318 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7319 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7320 {
7321 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7322 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7323 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7324 }
7325 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7326 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7327 {
7328 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7329 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7330 & ~pReNative->Core.bmHstRegsWithGstShadow
7331 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7332 & fNotArgsMask;
7333 if (fRegs)
7334 {
7335 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7336 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7337 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7338 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7339 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7340 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7341 }
7342 else
7343 {
7344 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7345 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7346 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7347 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7348 }
7349 }
7350 else
7351 {
7352 idxReg = idxRegPref;
7353 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7354 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7355 }
7356 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7357 pVar->idxReg = idxReg;
7358
7359 /*
7360 * Load it off the stack if we've got a stack slot.
7361 */
7362 uint8_t const idxStackSlot = pVar->idxStackSlot;
7363 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7364 {
7365 Assert(fInitialized);
7366 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7367 switch (pVar->cbVar)
7368 {
7369 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7370 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7371 case 3: AssertFailed(); RT_FALL_THRU();
7372 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7373 default: AssertFailed(); RT_FALL_THRU();
7374 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7375 }
7376 }
7377 else
7378 {
7379 Assert(idxStackSlot == UINT8_MAX);
7380 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7381 }
7382 pVar->fRegAcquired = true;
7383 return idxReg;
7384}
7385
7386
7387/**
7388 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7389 * guest register.
7390 *
7391 * This function makes sure there is a register for it and sets it to be the
7392 * current shadow copy of @a enmGstReg.
7393 *
7394 * @returns The host register number.
7395 * @param pReNative The recompiler state.
7396 * @param idxVar The variable.
7397 * @param enmGstReg The guest register this variable will be written to
7398 * after this call.
7399 * @param poff Pointer to the instruction buffer offset.
7400 * In case a register needs to be freed up or if the
7401 * variable content needs to be loaded off the stack.
7402 *
7403 * @note We DO NOT expect @a idxVar to be an argument variable,
7404 * because we can only in the commit stage of an instruction when this
7405 * function is used.
7406 */
7407DECL_HIDDEN_THROW(uint8_t)
7408iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7409{
7410 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7411 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7412 Assert(!pVar->fRegAcquired);
7413 AssertMsgStmt( pVar->cbVar <= 8
7414 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7415 || pVar->enmKind == kIemNativeVarKind_Stack),
7416 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7417 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7418 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7419
7420 /*
7421 * This shouldn't ever be used for arguments, unless it's in a weird else
7422 * branch that doesn't do any calling and even then it's questionable.
7423 *
7424 * However, in case someone writes crazy wrong MC code and does register
7425 * updates before making calls, just use the regular register allocator to
7426 * ensure we get a register suitable for the intended argument number.
7427 */
7428 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7429
7430 /*
7431 * If there is already a register for the variable, we transfer/set the
7432 * guest shadow copy assignment to it.
7433 */
7434 uint8_t idxReg = pVar->idxReg;
7435 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7436 {
7437 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7438 {
7439 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7440 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7441 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7442 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7443 }
7444 else
7445 {
7446 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7447 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7448 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7449 }
7450 /** @todo figure this one out. We need some way of making sure the register isn't
7451 * modified after this point, just in case we start writing crappy MC code. */
7452 pVar->enmGstReg = enmGstReg;
7453 pVar->fRegAcquired = true;
7454 return idxReg;
7455 }
7456 Assert(pVar->uArgNo == UINT8_MAX);
7457
7458 /*
7459 * Because this is supposed to be the commit stage, we're just tag along with the
7460 * temporary register allocator and upgrade it to a variable register.
7461 */
7462 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7463 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7464 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7465 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7466 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7467 pVar->idxReg = idxReg;
7468
7469 /*
7470 * Now we need to load the register value.
7471 */
7472 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7473 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7474 else
7475 {
7476 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7477 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7478 switch (pVar->cbVar)
7479 {
7480 case sizeof(uint64_t):
7481 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7482 break;
7483 case sizeof(uint32_t):
7484 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7485 break;
7486 case sizeof(uint16_t):
7487 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7488 break;
7489 case sizeof(uint8_t):
7490 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7491 break;
7492 default:
7493 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7494 }
7495 }
7496
7497 pVar->fRegAcquired = true;
7498 return idxReg;
7499}
7500
7501
7502/**
7503 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7504 *
7505 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7506 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7507 * requirement of flushing anything in volatile host registers when making a
7508 * call.
7509 *
7510 * @returns New @a off value.
7511 * @param pReNative The recompiler state.
7512 * @param off The code buffer position.
7513 * @param fHstRegsNotToSave Set of registers not to save & restore.
7514 */
7515DECL_HIDDEN_THROW(uint32_t)
7516iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7517{
7518 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7519 if (fHstRegs)
7520 {
7521 do
7522 {
7523 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7524 fHstRegs &= ~RT_BIT_32(idxHstReg);
7525
7526 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7527 {
7528 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7529 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7530 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7531 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7532 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7533 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7534 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7535 {
7536 case kIemNativeVarKind_Stack:
7537 {
7538 /* Temporarily spill the variable register. */
7539 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7540 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7541 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7542 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7543 continue;
7544 }
7545
7546 case kIemNativeVarKind_Immediate:
7547 case kIemNativeVarKind_VarRef:
7548 case kIemNativeVarKind_GstRegRef:
7549 /* It is weird to have any of these loaded at this point. */
7550 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7551 continue;
7552
7553 case kIemNativeVarKind_End:
7554 case kIemNativeVarKind_Invalid:
7555 break;
7556 }
7557 AssertFailed();
7558 }
7559 else
7560 {
7561 /*
7562 * Allocate a temporary stack slot and spill the register to it.
7563 */
7564 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7565 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7566 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7567 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7568 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7569 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7570 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7571 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7572 }
7573 } while (fHstRegs);
7574 }
7575 return off;
7576}
7577
7578
7579/**
7580 * Emit code to restore volatile registers after to a call to a helper.
7581 *
7582 * @returns New @a off value.
7583 * @param pReNative The recompiler state.
7584 * @param off The code buffer position.
7585 * @param fHstRegsNotToSave Set of registers not to save & restore.
7586 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7587 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7588 */
7589DECL_HIDDEN_THROW(uint32_t)
7590iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7591{
7592 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7593 if (fHstRegs)
7594 {
7595 do
7596 {
7597 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7598 fHstRegs &= ~RT_BIT_32(idxHstReg);
7599
7600 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7601 {
7602 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7604 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7605 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7606 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7607 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7608 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7609 {
7610 case kIemNativeVarKind_Stack:
7611 {
7612 /* Unspill the variable register. */
7613 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7614 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7615 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7616 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7617 continue;
7618 }
7619
7620 case kIemNativeVarKind_Immediate:
7621 case kIemNativeVarKind_VarRef:
7622 case kIemNativeVarKind_GstRegRef:
7623 /* It is weird to have any of these loaded at this point. */
7624 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7625 continue;
7626
7627 case kIemNativeVarKind_End:
7628 case kIemNativeVarKind_Invalid:
7629 break;
7630 }
7631 AssertFailed();
7632 }
7633 else
7634 {
7635 /*
7636 * Restore from temporary stack slot.
7637 */
7638 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7639 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7640 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7641 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7642
7643 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7644 }
7645 } while (fHstRegs);
7646 }
7647 return off;
7648}
7649
7650
7651/**
7652 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7653 *
7654 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7655 *
7656 * ASSUMES that @a idxVar is valid and unpacked.
7657 */
7658DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7659{
7660 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7661 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7662 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7663 {
7664 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7665 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7666 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7667 Assert(cSlots > 0);
7668 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7669 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7670 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7671 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7672 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7673 }
7674 else
7675 Assert(idxStackSlot == UINT8_MAX);
7676}
7677
7678
7679/**
7680 * Worker that frees a single variable.
7681 *
7682 * ASSUMES that @a idxVar is valid and unpacked.
7683 */
7684DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7685{
7686 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7687 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7688 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7689
7690 /* Free the host register first if any assigned. */
7691 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7692 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7693 {
7694 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7695 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7696 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7697 }
7698
7699 /* Free argument mapping. */
7700 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7701 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7702 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7703
7704 /* Free the stack slots. */
7705 iemNativeVarFreeStackSlots(pReNative, idxVar);
7706
7707 /* Free the actual variable. */
7708 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7709 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7710}
7711
7712
7713/**
7714 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7715 */
7716DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7717{
7718 while (bmVars != 0)
7719 {
7720 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7721 bmVars &= ~RT_BIT_32(idxVar);
7722
7723#if 1 /** @todo optimize by simplifying this later... */
7724 iemNativeVarFreeOneWorker(pReNative, idxVar);
7725#else
7726 /* Only need to free the host register, the rest is done as bulk updates below. */
7727 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7728 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7729 {
7730 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7731 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7732 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7733 }
7734#endif
7735 }
7736#if 0 /** @todo optimize by simplifying this later... */
7737 pReNative->Core.bmVars = 0;
7738 pReNative->Core.bmStack = 0;
7739 pReNative->Core.u64ArgVars = UINT64_MAX;
7740#endif
7741}
7742
7743
7744
7745/*********************************************************************************************************************************
7746* Emitters for IEM_MC_CALL_CIMPL_XXX *
7747*********************************************************************************************************************************/
7748
7749/**
7750 * Emits code to load a reference to the given guest register into @a idxGprDst.
7751 */
7752DECL_INLINE_THROW(uint32_t)
7753iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7754 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7755{
7756#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7757 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7758#endif
7759
7760 /*
7761 * Get the offset relative to the CPUMCTX structure.
7762 */
7763 uint32_t offCpumCtx;
7764 switch (enmClass)
7765 {
7766 case kIemNativeGstRegRef_Gpr:
7767 Assert(idxRegInClass < 16);
7768 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7769 break;
7770
7771 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7772 Assert(idxRegInClass < 4);
7773 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7774 break;
7775
7776 case kIemNativeGstRegRef_EFlags:
7777 Assert(idxRegInClass == 0);
7778 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7779 break;
7780
7781 case kIemNativeGstRegRef_MxCsr:
7782 Assert(idxRegInClass == 0);
7783 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7784 break;
7785
7786 case kIemNativeGstRegRef_FpuReg:
7787 Assert(idxRegInClass < 8);
7788 AssertFailed(); /** @todo what kind of indexing? */
7789 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7790 break;
7791
7792 case kIemNativeGstRegRef_MReg:
7793 Assert(idxRegInClass < 8);
7794 AssertFailed(); /** @todo what kind of indexing? */
7795 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7796 break;
7797
7798 case kIemNativeGstRegRef_XReg:
7799 Assert(idxRegInClass < 16);
7800 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7801 break;
7802
7803 default:
7804 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7805 }
7806
7807 /*
7808 * Load the value into the destination register.
7809 */
7810#ifdef RT_ARCH_AMD64
7811 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7812
7813#elif defined(RT_ARCH_ARM64)
7814 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7815 Assert(offCpumCtx < 4096);
7816 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7817
7818#else
7819# error "Port me!"
7820#endif
7821
7822 return off;
7823}
7824
7825
7826/**
7827 * Common code for CIMPL and AIMPL calls.
7828 *
7829 * These are calls that uses argument variables and such. They should not be
7830 * confused with internal calls required to implement an MC operation,
7831 * like a TLB load and similar.
7832 *
7833 * Upon return all that is left to do is to load any hidden arguments and
7834 * perform the call. All argument variables are freed.
7835 *
7836 * @returns New code buffer offset; throws VBox status code on error.
7837 * @param pReNative The native recompile state.
7838 * @param off The code buffer offset.
7839 * @param cArgs The total nubmer of arguments (includes hidden
7840 * count).
7841 * @param cHiddenArgs The number of hidden arguments. The hidden
7842 * arguments must not have any variable declared for
7843 * them, whereas all the regular arguments must
7844 * (tstIEMCheckMc ensures this).
7845 */
7846DECL_HIDDEN_THROW(uint32_t)
7847iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7848{
7849#ifdef VBOX_STRICT
7850 /*
7851 * Assert sanity.
7852 */
7853 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7854 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7855 for (unsigned i = 0; i < cHiddenArgs; i++)
7856 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7857 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7858 {
7859 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7860 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7861 }
7862 iemNativeRegAssertSanity(pReNative);
7863#endif
7864
7865 /* We don't know what the called function makes use of, so flush any pending register writes. */
7866 off = iemNativeRegFlushPendingWrites(pReNative, off);
7867
7868 /*
7869 * Before we do anything else, go over variables that are referenced and
7870 * make sure they are not in a register.
7871 */
7872 uint32_t bmVars = pReNative->Core.bmVars;
7873 if (bmVars)
7874 {
7875 do
7876 {
7877 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7878 bmVars &= ~RT_BIT_32(idxVar);
7879
7880 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7881 {
7882 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7883 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7884 {
7885 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7886 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7887 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7888 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7889 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7890
7891 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7892 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7893 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7894 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7895 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7896 }
7897 }
7898 } while (bmVars != 0);
7899#if 0 //def VBOX_STRICT
7900 iemNativeRegAssertSanity(pReNative);
7901#endif
7902 }
7903
7904 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7905
7906 /*
7907 * First, go over the host registers that will be used for arguments and make
7908 * sure they either hold the desired argument or are free.
7909 */
7910 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7911 {
7912 for (uint32_t i = 0; i < cRegArgs; i++)
7913 {
7914 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7915 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7916 {
7917 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7918 {
7919 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7920 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7921 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7922 Assert(pVar->idxReg == idxArgReg);
7923 uint8_t const uArgNo = pVar->uArgNo;
7924 if (uArgNo == i)
7925 { /* prefect */ }
7926 /* The variable allocator logic should make sure this is impossible,
7927 except for when the return register is used as a parameter (ARM,
7928 but not x86). */
7929#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7930 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7931 {
7932# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7933# error "Implement this"
7934# endif
7935 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7936 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7937 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7938 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7939 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7940 }
7941#endif
7942 else
7943 {
7944 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7945
7946 if (pVar->enmKind == kIemNativeVarKind_Stack)
7947 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7948 else
7949 {
7950 /* just free it, can be reloaded if used again */
7951 pVar->idxReg = UINT8_MAX;
7952 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7953 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7954 }
7955 }
7956 }
7957 else
7958 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7959 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7960 }
7961 }
7962#if 0 //def VBOX_STRICT
7963 iemNativeRegAssertSanity(pReNative);
7964#endif
7965 }
7966
7967 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7968
7969#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7970 /*
7971 * If there are any stack arguments, make sure they are in their place as well.
7972 *
7973 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7974 * the caller) be loading it later and it must be free (see first loop).
7975 */
7976 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7977 {
7978 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7979 {
7980 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7981 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7982 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7983 {
7984 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7985 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7986 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7987 pVar->idxReg = UINT8_MAX;
7988 }
7989 else
7990 {
7991 /* Use ARG0 as temp for stuff we need registers for. */
7992 switch (pVar->enmKind)
7993 {
7994 case kIemNativeVarKind_Stack:
7995 {
7996 uint8_t const idxStackSlot = pVar->idxStackSlot;
7997 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7998 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7999 iemNativeStackCalcBpDisp(idxStackSlot));
8000 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8001 continue;
8002 }
8003
8004 case kIemNativeVarKind_Immediate:
8005 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8006 continue;
8007
8008 case kIemNativeVarKind_VarRef:
8009 {
8010 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8011 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8012 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8013 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8014 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8015 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8016 {
8017 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8018 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8019 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8020 }
8021 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8022 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8023 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8024 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8025 continue;
8026 }
8027
8028 case kIemNativeVarKind_GstRegRef:
8029 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8030 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8031 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8032 continue;
8033
8034 case kIemNativeVarKind_Invalid:
8035 case kIemNativeVarKind_End:
8036 break;
8037 }
8038 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8039 }
8040 }
8041# if 0 //def VBOX_STRICT
8042 iemNativeRegAssertSanity(pReNative);
8043# endif
8044 }
8045#else
8046 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8047#endif
8048
8049 /*
8050 * Make sure the argument variables are loaded into their respective registers.
8051 *
8052 * We can optimize this by ASSUMING that any register allocations are for
8053 * registeres that have already been loaded and are ready. The previous step
8054 * saw to that.
8055 */
8056 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8057 {
8058 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8059 {
8060 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8061 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8062 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8063 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8064 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8065 else
8066 {
8067 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8068 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8069 {
8070 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8071 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8072 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8073 | RT_BIT_32(idxArgReg);
8074 pVar->idxReg = idxArgReg;
8075 }
8076 else
8077 {
8078 /* Use ARG0 as temp for stuff we need registers for. */
8079 switch (pVar->enmKind)
8080 {
8081 case kIemNativeVarKind_Stack:
8082 {
8083 uint8_t const idxStackSlot = pVar->idxStackSlot;
8084 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8085 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8086 continue;
8087 }
8088
8089 case kIemNativeVarKind_Immediate:
8090 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8091 continue;
8092
8093 case kIemNativeVarKind_VarRef:
8094 {
8095 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8096 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8097 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8098 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8099 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8100 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8101 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8102 {
8103 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8104 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8105 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8106 }
8107 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8108 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8109 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8110 continue;
8111 }
8112
8113 case kIemNativeVarKind_GstRegRef:
8114 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8115 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8116 continue;
8117
8118 case kIemNativeVarKind_Invalid:
8119 case kIemNativeVarKind_End:
8120 break;
8121 }
8122 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8123 }
8124 }
8125 }
8126#if 0 //def VBOX_STRICT
8127 iemNativeRegAssertSanity(pReNative);
8128#endif
8129 }
8130#ifdef VBOX_STRICT
8131 else
8132 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8133 {
8134 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8135 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8136 }
8137#endif
8138
8139 /*
8140 * Free all argument variables (simplified).
8141 * Their lifetime always expires with the call they are for.
8142 */
8143 /** @todo Make the python script check that arguments aren't used after
8144 * IEM_MC_CALL_XXXX. */
8145 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8146 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8147 * an argument value. There is also some FPU stuff. */
8148 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8149 {
8150 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8151 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8152
8153 /* no need to free registers: */
8154 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8155 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8156 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8157 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8158 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8159 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8160
8161 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8162 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8163 iemNativeVarFreeStackSlots(pReNative, idxVar);
8164 }
8165 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8166
8167 /*
8168 * Flush volatile registers as we make the call.
8169 */
8170 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8171
8172 return off;
8173}
8174
8175
8176
8177/*********************************************************************************************************************************
8178* TLB Lookup. *
8179*********************************************************************************************************************************/
8180
8181/**
8182 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8183 */
8184DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8185{
8186 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8187 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8188 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8189 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8190
8191 /* Do the lookup manually. */
8192 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8193 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8194 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8195 if (RT_LIKELY(pTlbe->uTag == uTag))
8196 {
8197 /*
8198 * Check TLB page table level access flags.
8199 */
8200 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8201 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8202 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8203 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8204 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8205 | IEMTLBE_F_PG_UNASSIGNED
8206 | IEMTLBE_F_PT_NO_ACCESSED
8207 | fNoWriteNoDirty | fNoUser);
8208 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8209 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8210 {
8211 /*
8212 * Return the address.
8213 */
8214 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8215 if ((uintptr_t)pbAddr == uResult)
8216 return;
8217 RT_NOREF(cbMem);
8218 AssertFailed();
8219 }
8220 else
8221 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8222 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8223 }
8224 else
8225 AssertFailed();
8226 RT_BREAKPOINT();
8227}
8228
8229/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8230
8231
8232
8233/*********************************************************************************************************************************
8234* Recompiler Core. *
8235*********************************************************************************************************************************/
8236
8237/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8238static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8239{
8240 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8241 pDis->cbCachedInstr += cbMaxRead;
8242 RT_NOREF(cbMinRead);
8243 return VERR_NO_DATA;
8244}
8245
8246
8247DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8248{
8249 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8250 {
8251#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8252 ENTRY(fLocalForcedActions),
8253 ENTRY(iem.s.rcPassUp),
8254 ENTRY(iem.s.fExec),
8255 ENTRY(iem.s.pbInstrBuf),
8256 ENTRY(iem.s.uInstrBufPc),
8257 ENTRY(iem.s.GCPhysInstrBuf),
8258 ENTRY(iem.s.cbInstrBufTotal),
8259 ENTRY(iem.s.idxTbCurInstr),
8260#ifdef VBOX_WITH_STATISTICS
8261 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8262 ENTRY(iem.s.StatNativeTlbHitsForStore),
8263 ENTRY(iem.s.StatNativeTlbHitsForStack),
8264 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8265 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8266 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8267 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8268 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8269#endif
8270 ENTRY(iem.s.DataTlb.aEntries),
8271 ENTRY(iem.s.DataTlb.uTlbRevision),
8272 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8273 ENTRY(iem.s.DataTlb.cTlbHits),
8274 ENTRY(iem.s.CodeTlb.aEntries),
8275 ENTRY(iem.s.CodeTlb.uTlbRevision),
8276 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8277 ENTRY(iem.s.CodeTlb.cTlbHits),
8278 ENTRY(pVMR3),
8279 ENTRY(cpum.GstCtx.rax),
8280 ENTRY(cpum.GstCtx.ah),
8281 ENTRY(cpum.GstCtx.rcx),
8282 ENTRY(cpum.GstCtx.ch),
8283 ENTRY(cpum.GstCtx.rdx),
8284 ENTRY(cpum.GstCtx.dh),
8285 ENTRY(cpum.GstCtx.rbx),
8286 ENTRY(cpum.GstCtx.bh),
8287 ENTRY(cpum.GstCtx.rsp),
8288 ENTRY(cpum.GstCtx.rbp),
8289 ENTRY(cpum.GstCtx.rsi),
8290 ENTRY(cpum.GstCtx.rdi),
8291 ENTRY(cpum.GstCtx.r8),
8292 ENTRY(cpum.GstCtx.r9),
8293 ENTRY(cpum.GstCtx.r10),
8294 ENTRY(cpum.GstCtx.r11),
8295 ENTRY(cpum.GstCtx.r12),
8296 ENTRY(cpum.GstCtx.r13),
8297 ENTRY(cpum.GstCtx.r14),
8298 ENTRY(cpum.GstCtx.r15),
8299 ENTRY(cpum.GstCtx.es.Sel),
8300 ENTRY(cpum.GstCtx.es.u64Base),
8301 ENTRY(cpum.GstCtx.es.u32Limit),
8302 ENTRY(cpum.GstCtx.es.Attr),
8303 ENTRY(cpum.GstCtx.cs.Sel),
8304 ENTRY(cpum.GstCtx.cs.u64Base),
8305 ENTRY(cpum.GstCtx.cs.u32Limit),
8306 ENTRY(cpum.GstCtx.cs.Attr),
8307 ENTRY(cpum.GstCtx.ss.Sel),
8308 ENTRY(cpum.GstCtx.ss.u64Base),
8309 ENTRY(cpum.GstCtx.ss.u32Limit),
8310 ENTRY(cpum.GstCtx.ss.Attr),
8311 ENTRY(cpum.GstCtx.ds.Sel),
8312 ENTRY(cpum.GstCtx.ds.u64Base),
8313 ENTRY(cpum.GstCtx.ds.u32Limit),
8314 ENTRY(cpum.GstCtx.ds.Attr),
8315 ENTRY(cpum.GstCtx.fs.Sel),
8316 ENTRY(cpum.GstCtx.fs.u64Base),
8317 ENTRY(cpum.GstCtx.fs.u32Limit),
8318 ENTRY(cpum.GstCtx.fs.Attr),
8319 ENTRY(cpum.GstCtx.gs.Sel),
8320 ENTRY(cpum.GstCtx.gs.u64Base),
8321 ENTRY(cpum.GstCtx.gs.u32Limit),
8322 ENTRY(cpum.GstCtx.gs.Attr),
8323 ENTRY(cpum.GstCtx.rip),
8324 ENTRY(cpum.GstCtx.eflags),
8325 ENTRY(cpum.GstCtx.uRipInhibitInt),
8326#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8327 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8328 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8329 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8330 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8331 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8332 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8333 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8334 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8335 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8336 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8337 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8338 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8339 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8340 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8341 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8342 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8343 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8344 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8345 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8346 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8347 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8348 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8349 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8350 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8351 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8352 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8353 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8354 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8355 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8356 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8357 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8358 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8359#endif
8360#undef ENTRY
8361 };
8362#ifdef VBOX_STRICT
8363 static bool s_fOrderChecked = false;
8364 if (!s_fOrderChecked)
8365 {
8366 s_fOrderChecked = true;
8367 uint32_t offPrev = s_aMembers[0].off;
8368 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8369 {
8370 Assert(s_aMembers[i].off > offPrev);
8371 offPrev = s_aMembers[i].off;
8372 }
8373 }
8374#endif
8375
8376 /*
8377 * Binary lookup.
8378 */
8379 unsigned iStart = 0;
8380 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8381 for (;;)
8382 {
8383 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8384 uint32_t const offCur = s_aMembers[iCur].off;
8385 if (off < offCur)
8386 {
8387 if (iCur != iStart)
8388 iEnd = iCur;
8389 else
8390 break;
8391 }
8392 else if (off > offCur)
8393 {
8394 if (iCur + 1 < iEnd)
8395 iStart = iCur + 1;
8396 else
8397 break;
8398 }
8399 else
8400 return s_aMembers[iCur].pszName;
8401 }
8402#ifdef VBOX_WITH_STATISTICS
8403 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8404 return "iem.s.acThreadedFuncStats[iFn]";
8405#endif
8406 return NULL;
8407}
8408
8409
8410/**
8411 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
8412 * @returns pszBuf.
8413 * @param fFlags The flags.
8414 * @param pszBuf The output buffer.
8415 * @param cbBuf The output buffer size. At least 32 bytes.
8416 */
8417DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
8418{
8419 Assert(cbBuf >= 32);
8420 static RTSTRTUPLE const s_aModes[] =
8421 {
8422 /* [00] = */ { RT_STR_TUPLE("16BIT") },
8423 /* [01] = */ { RT_STR_TUPLE("32BIT") },
8424 /* [02] = */ { RT_STR_TUPLE("!2!") },
8425 /* [03] = */ { RT_STR_TUPLE("!3!") },
8426 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
8427 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
8428 /* [06] = */ { RT_STR_TUPLE("!6!") },
8429 /* [07] = */ { RT_STR_TUPLE("!7!") },
8430 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
8431 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
8432 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
8433 /* [0b] = */ { RT_STR_TUPLE("!b!") },
8434 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
8435 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
8436 /* [0e] = */ { RT_STR_TUPLE("!e!") },
8437 /* [0f] = */ { RT_STR_TUPLE("!f!") },
8438 /* [10] = */ { RT_STR_TUPLE("!10!") },
8439 /* [11] = */ { RT_STR_TUPLE("!11!") },
8440 /* [12] = */ { RT_STR_TUPLE("!12!") },
8441 /* [13] = */ { RT_STR_TUPLE("!13!") },
8442 /* [14] = */ { RT_STR_TUPLE("!14!") },
8443 /* [15] = */ { RT_STR_TUPLE("!15!") },
8444 /* [16] = */ { RT_STR_TUPLE("!16!") },
8445 /* [17] = */ { RT_STR_TUPLE("!17!") },
8446 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
8447 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
8448 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
8449 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
8450 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
8451 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
8452 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
8453 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
8454 };
8455 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
8456 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
8457 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
8458
8459 pszBuf[off++] = ' ';
8460 pszBuf[off++] = 'C';
8461 pszBuf[off++] = 'P';
8462 pszBuf[off++] = 'L';
8463 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
8464 Assert(off < 32);
8465
8466 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
8467
8468 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
8469 {
8470 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
8471 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
8472 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
8473 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
8474 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
8475 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
8476 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
8477 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
8478 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
8479 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
8480 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
8481 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
8482 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
8483 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
8484 };
8485 if (fFlags)
8486 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
8487 if (s_aFlags[i].fFlag & fFlags)
8488 {
8489 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
8490 pszBuf[off++] = ' ';
8491 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
8492 off += s_aFlags[i].cchName;
8493 fFlags &= ~s_aFlags[i].fFlag;
8494 if (!fFlags)
8495 break;
8496 }
8497 pszBuf[off] = '\0';
8498
8499 return pszBuf;
8500}
8501
8502
8503DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8504{
8505 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8506#if defined(RT_ARCH_AMD64)
8507 static const char * const a_apszMarkers[] =
8508 {
8509 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8510 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8511 };
8512#endif
8513
8514 char szDisBuf[512];
8515 DISSTATE Dis;
8516 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8517 uint32_t const cNative = pTb->Native.cInstructions;
8518 uint32_t offNative = 0;
8519#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8520 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8521#endif
8522 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8523 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8524 : DISCPUMODE_64BIT;
8525#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8526 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8527#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8528 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8529#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8530# error "Port me"
8531#else
8532 csh hDisasm = ~(size_t)0;
8533# if defined(RT_ARCH_AMD64)
8534 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8535# elif defined(RT_ARCH_ARM64)
8536 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8537# else
8538# error "Port me"
8539# endif
8540 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8541
8542 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8543 //Assert(rcCs == CS_ERR_OK);
8544#endif
8545
8546 /*
8547 * Print TB info.
8548 */
8549 pHlp->pfnPrintf(pHlp,
8550 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8551 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8552 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8553 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8554#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8555 if (pDbgInfo && pDbgInfo->cEntries > 1)
8556 {
8557 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8558
8559 /*
8560 * This disassembly is driven by the debug info which follows the native
8561 * code and indicates when it starts with the next guest instructions,
8562 * where labels are and such things.
8563 */
8564 uint32_t idxThreadedCall = 0;
8565 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8566 uint8_t idxRange = UINT8_MAX;
8567 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8568 uint32_t offRange = 0;
8569 uint32_t offOpcodes = 0;
8570 uint32_t const cbOpcodes = pTb->cbOpcodes;
8571 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8572 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8573 uint32_t iDbgEntry = 1;
8574 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8575
8576 while (offNative < cNative)
8577 {
8578 /* If we're at or have passed the point where the next chunk of debug
8579 info starts, process it. */
8580 if (offDbgNativeNext <= offNative)
8581 {
8582 offDbgNativeNext = UINT32_MAX;
8583 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8584 {
8585 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8586 {
8587 case kIemTbDbgEntryType_GuestInstruction:
8588 {
8589 /* Did the exec flag change? */
8590 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8591 {
8592 pHlp->pfnPrintf(pHlp,
8593 " fExec change %#08x -> %#08x %s\n",
8594 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8595 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8596 szDisBuf, sizeof(szDisBuf)));
8597 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8598 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8599 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8600 : DISCPUMODE_64BIT;
8601 }
8602
8603 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8604 where the compilation was aborted before the opcode was recorded and the actual
8605 instruction was translated to a threaded call. This may happen when we run out
8606 of ranges, or when some complicated interrupts/FFs are found to be pending or
8607 similar. So, we just deal with it here rather than in the compiler code as it
8608 is a lot simpler to do here. */
8609 if ( idxRange == UINT8_MAX
8610 || idxRange >= cRanges
8611 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8612 {
8613 idxRange += 1;
8614 if (idxRange < cRanges)
8615 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8616 else
8617 continue;
8618 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8619 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8620 + (pTb->aRanges[idxRange].idxPhysPage == 0
8621 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8622 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8623 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8624 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8625 pTb->aRanges[idxRange].idxPhysPage);
8626 GCPhysPc += offRange;
8627 }
8628
8629 /* Disassemble the instruction. */
8630 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8631 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8632 uint32_t cbInstr = 1;
8633 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8634 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8635 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8636 if (RT_SUCCESS(rc))
8637 {
8638 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8639 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8640 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8641 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8642
8643 static unsigned const s_offMarker = 55;
8644 static char const s_szMarker[] = " ; <--- guest";
8645 if (cch < s_offMarker)
8646 {
8647 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8648 cch = s_offMarker;
8649 }
8650 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8651 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8652
8653 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8654 }
8655 else
8656 {
8657 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8658 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8659 cbInstr = 1;
8660 }
8661 GCPhysPc += cbInstr;
8662 offOpcodes += cbInstr;
8663 offRange += cbInstr;
8664 continue;
8665 }
8666
8667 case kIemTbDbgEntryType_ThreadedCall:
8668 pHlp->pfnPrintf(pHlp,
8669 " Call #%u to %s (%u args) - %s\n",
8670 idxThreadedCall,
8671 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8672 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8673 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8674 idxThreadedCall++;
8675 continue;
8676
8677 case kIemTbDbgEntryType_GuestRegShadowing:
8678 {
8679 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8680 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8681 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8682 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8683 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8684 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8685 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8686 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8687 else
8688 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8689 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8690 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8691 continue;
8692 }
8693
8694#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8695 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8696 {
8697 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8698 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8699 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8700 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8701 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8702 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8703 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8704 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8705 else
8706 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8707 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8708 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8709 continue;
8710 }
8711#endif
8712
8713 case kIemTbDbgEntryType_Label:
8714 {
8715 const char *pszName = "what_the_fudge";
8716 const char *pszComment = "";
8717 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8718 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8719 {
8720 case kIemNativeLabelType_Return:
8721 pszName = "Return";
8722 break;
8723 case kIemNativeLabelType_ReturnBreak:
8724 pszName = "ReturnBreak";
8725 break;
8726 case kIemNativeLabelType_ReturnWithFlags:
8727 pszName = "ReturnWithFlags";
8728 break;
8729 case kIemNativeLabelType_NonZeroRetOrPassUp:
8730 pszName = "NonZeroRetOrPassUp";
8731 break;
8732 case kIemNativeLabelType_RaiseGp0:
8733 pszName = "RaiseGp0";
8734 break;
8735 case kIemNativeLabelType_RaiseNm:
8736 pszName = "RaiseNm";
8737 break;
8738 case kIemNativeLabelType_RaiseUd:
8739 pszName = "RaiseUd";
8740 break;
8741 case kIemNativeLabelType_RaiseMf:
8742 pszName = "RaiseMf";
8743 break;
8744 case kIemNativeLabelType_RaiseXf:
8745 pszName = "RaiseXf";
8746 break;
8747 case kIemNativeLabelType_ObsoleteTb:
8748 pszName = "ObsoleteTb";
8749 break;
8750 case kIemNativeLabelType_NeedCsLimChecking:
8751 pszName = "NeedCsLimChecking";
8752 break;
8753 case kIemNativeLabelType_CheckBranchMiss:
8754 pszName = "CheckBranchMiss";
8755 break;
8756 case kIemNativeLabelType_If:
8757 pszName = "If";
8758 fNumbered = true;
8759 break;
8760 case kIemNativeLabelType_Else:
8761 pszName = "Else";
8762 fNumbered = true;
8763 pszComment = " ; regs state restored pre-if-block";
8764 break;
8765 case kIemNativeLabelType_Endif:
8766 pszName = "Endif";
8767 fNumbered = true;
8768 break;
8769 case kIemNativeLabelType_CheckIrq:
8770 pszName = "CheckIrq_CheckVM";
8771 fNumbered = true;
8772 break;
8773 case kIemNativeLabelType_TlbLookup:
8774 pszName = "TlbLookup";
8775 fNumbered = true;
8776 break;
8777 case kIemNativeLabelType_TlbMiss:
8778 pszName = "TlbMiss";
8779 fNumbered = true;
8780 break;
8781 case kIemNativeLabelType_TlbDone:
8782 pszName = "TlbDone";
8783 fNumbered = true;
8784 break;
8785 case kIemNativeLabelType_Invalid:
8786 case kIemNativeLabelType_End:
8787 break;
8788 }
8789 if (fNumbered)
8790 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8791 else
8792 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8793 continue;
8794 }
8795
8796 case kIemTbDbgEntryType_NativeOffset:
8797 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8798 Assert(offDbgNativeNext > offNative);
8799 break;
8800
8801#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8802 case kIemTbDbgEntryType_DelayedPcUpdate:
8803 pHlp->pfnPrintf(pHlp,
8804 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8805 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8806 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8807 continue;
8808#endif
8809
8810 default:
8811 AssertFailed();
8812 }
8813 iDbgEntry++;
8814 break;
8815 }
8816 }
8817
8818 /*
8819 * Disassemble the next native instruction.
8820 */
8821 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8822# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8823 uint32_t cbInstr = sizeof(paNative[0]);
8824 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8825 if (RT_SUCCESS(rc))
8826 {
8827# if defined(RT_ARCH_AMD64)
8828 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8829 {
8830 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8831 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8832 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8833 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8834 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8835 uInfo & 0x8000 ? "recompiled" : "todo");
8836 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8837 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8838 else
8839 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8840 }
8841 else
8842# endif
8843 {
8844 const char *pszAnnotation = NULL;
8845# ifdef RT_ARCH_AMD64
8846 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8847 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8848 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8849 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8850 PCDISOPPARAM pMemOp;
8851 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8852 pMemOp = &Dis.Param1;
8853 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8854 pMemOp = &Dis.Param2;
8855 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8856 pMemOp = &Dis.Param3;
8857 else
8858 pMemOp = NULL;
8859 if ( pMemOp
8860 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8861 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8862 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8863 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8864
8865#elif defined(RT_ARCH_ARM64)
8866 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8867 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8868 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8869# else
8870# error "Port me"
8871# endif
8872 if (pszAnnotation)
8873 {
8874 static unsigned const s_offAnnotation = 55;
8875 size_t const cchAnnotation = strlen(pszAnnotation);
8876 size_t cchDis = strlen(szDisBuf);
8877 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8878 {
8879 if (cchDis < s_offAnnotation)
8880 {
8881 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8882 cchDis = s_offAnnotation;
8883 }
8884 szDisBuf[cchDis++] = ' ';
8885 szDisBuf[cchDis++] = ';';
8886 szDisBuf[cchDis++] = ' ';
8887 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8888 }
8889 }
8890 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8891 }
8892 }
8893 else
8894 {
8895# if defined(RT_ARCH_AMD64)
8896 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8897 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8898# elif defined(RT_ARCH_ARM64)
8899 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8900# else
8901# error "Port me"
8902# endif
8903 cbInstr = sizeof(paNative[0]);
8904 }
8905 offNative += cbInstr / sizeof(paNative[0]);
8906
8907# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8908 cs_insn *pInstr;
8909 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8910 (uintptr_t)pNativeCur, 1, &pInstr);
8911 if (cInstrs > 0)
8912 {
8913 Assert(cInstrs == 1);
8914 const char *pszAnnotation = NULL;
8915# if defined(RT_ARCH_ARM64)
8916 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8917 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8918 {
8919 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8920 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8921 char *psz = strchr(pInstr->op_str, '[');
8922 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8923 {
8924 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8925 int32_t off = -1;
8926 psz += 4;
8927 if (*psz == ']')
8928 off = 0;
8929 else if (*psz == ',')
8930 {
8931 psz = RTStrStripL(psz + 1);
8932 if (*psz == '#')
8933 off = RTStrToInt32(&psz[1]);
8934 /** @todo deal with index registers and LSL as well... */
8935 }
8936 if (off >= 0)
8937 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8938 }
8939 }
8940# endif
8941
8942 size_t const cchOp = strlen(pInstr->op_str);
8943# if defined(RT_ARCH_AMD64)
8944 if (pszAnnotation)
8945 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8946 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8947 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8948 else
8949 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8950 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8951
8952# else
8953 if (pszAnnotation)
8954 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8955 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8956 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8957 else
8958 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8959 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8960# endif
8961 offNative += pInstr->size / sizeof(*pNativeCur);
8962 cs_free(pInstr, cInstrs);
8963 }
8964 else
8965 {
8966# if defined(RT_ARCH_AMD64)
8967 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8968 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8969# else
8970 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8971# endif
8972 offNative++;
8973 }
8974# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8975 }
8976 }
8977 else
8978#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8979 {
8980 /*
8981 * No debug info, just disassemble the x86 code and then the native code.
8982 *
8983 * First the guest code:
8984 */
8985 for (unsigned i = 0; i < pTb->cRanges; i++)
8986 {
8987 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8988 + (pTb->aRanges[i].idxPhysPage == 0
8989 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8990 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8991 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8992 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8993 unsigned off = pTb->aRanges[i].offOpcodes;
8994 /** @todo this ain't working when crossing pages! */
8995 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8996 while (off < cbOpcodes)
8997 {
8998 uint32_t cbInstr = 1;
8999 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9000 &pTb->pabOpcodes[off], cbOpcodes - off,
9001 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9002 if (RT_SUCCESS(rc))
9003 {
9004 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9005 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9006 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9007 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9008 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9009 GCPhysPc += cbInstr;
9010 off += cbInstr;
9011 }
9012 else
9013 {
9014 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9015 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9016 break;
9017 }
9018 }
9019 }
9020
9021 /*
9022 * Then the native code:
9023 */
9024 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9025 while (offNative < cNative)
9026 {
9027 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9028# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9029 uint32_t cbInstr = sizeof(paNative[0]);
9030 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9031 if (RT_SUCCESS(rc))
9032 {
9033# if defined(RT_ARCH_AMD64)
9034 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9035 {
9036 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9037 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9038 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9039 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9040 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9041 uInfo & 0x8000 ? "recompiled" : "todo");
9042 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9043 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9044 else
9045 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9046 }
9047 else
9048# endif
9049 {
9050# ifdef RT_ARCH_AMD64
9051 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9052 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9053 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9054 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9055# elif defined(RT_ARCH_ARM64)
9056 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9057 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9058 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9059# else
9060# error "Port me"
9061# endif
9062 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9063 }
9064 }
9065 else
9066 {
9067# if defined(RT_ARCH_AMD64)
9068 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9069 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9070# else
9071 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9072# endif
9073 cbInstr = sizeof(paNative[0]);
9074 }
9075 offNative += cbInstr / sizeof(paNative[0]);
9076
9077# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9078 cs_insn *pInstr;
9079 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9080 (uintptr_t)pNativeCur, 1, &pInstr);
9081 if (cInstrs > 0)
9082 {
9083 Assert(cInstrs == 1);
9084# if defined(RT_ARCH_AMD64)
9085 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9086 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9087# else
9088 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9089 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9090# endif
9091 offNative += pInstr->size / sizeof(*pNativeCur);
9092 cs_free(pInstr, cInstrs);
9093 }
9094 else
9095 {
9096# if defined(RT_ARCH_AMD64)
9097 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9098 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9099# else
9100 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9101# endif
9102 offNative++;
9103 }
9104# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9105 }
9106 }
9107
9108#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9109 /* Cleanup. */
9110 cs_close(&hDisasm);
9111#endif
9112}
9113
9114
9115/**
9116 * Recompiles the given threaded TB into a native one.
9117 *
9118 * In case of failure the translation block will be returned as-is.
9119 *
9120 * @returns pTb.
9121 * @param pVCpu The cross context virtual CPU structure of the calling
9122 * thread.
9123 * @param pTb The threaded translation to recompile to native.
9124 */
9125DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9126{
9127 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9128
9129 /*
9130 * The first time thru, we allocate the recompiler state, the other times
9131 * we just need to reset it before using it again.
9132 */
9133 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9134 if (RT_LIKELY(pReNative))
9135 iemNativeReInit(pReNative, pTb);
9136 else
9137 {
9138 pReNative = iemNativeInit(pVCpu, pTb);
9139 AssertReturn(pReNative, pTb);
9140 }
9141
9142#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9143 /*
9144 * First do liveness analysis. This is done backwards.
9145 */
9146 {
9147 uint32_t idxCall = pTb->Thrd.cCalls;
9148 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9149 { /* likely */ }
9150 else
9151 {
9152 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9153 while (idxCall > cAlloc)
9154 cAlloc *= 2;
9155 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9156 AssertReturn(pvNew, pTb);
9157 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9158 pReNative->cLivenessEntriesAlloc = cAlloc;
9159 }
9160 AssertReturn(idxCall > 0, pTb);
9161 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9162
9163 /* The initial (final) entry. */
9164 idxCall--;
9165 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9166
9167 /* Loop backwards thru the calls and fill in the other entries. */
9168 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9169 while (idxCall > 0)
9170 {
9171 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9172 if (pfnLiveness)
9173 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9174 else
9175 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9176 pCallEntry--;
9177 idxCall--;
9178 }
9179
9180# ifdef VBOX_WITH_STATISTICS
9181 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9182 to 'clobbered' rather that 'input'. */
9183 /** @todo */
9184# endif
9185 }
9186#endif
9187
9188 /*
9189 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9190 * for aborting if an error happens.
9191 */
9192 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9193#ifdef LOG_ENABLED
9194 uint32_t const cCallsOrg = cCallsLeft;
9195#endif
9196 uint32_t off = 0;
9197 int rc = VINF_SUCCESS;
9198 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9199 {
9200 /*
9201 * Emit prolog code (fixed).
9202 */
9203 off = iemNativeEmitProlog(pReNative, off);
9204
9205 /*
9206 * Convert the calls to native code.
9207 */
9208#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9209 int32_t iGstInstr = -1;
9210#endif
9211#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9212 uint32_t cThreadedCalls = 0;
9213 uint32_t cRecompiledCalls = 0;
9214#endif
9215#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9216 uint32_t idxCurCall = 0;
9217#endif
9218 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9219 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9220 while (cCallsLeft-- > 0)
9221 {
9222 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9223#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9224 pReNative->idxCurCall = idxCurCall;
9225#endif
9226
9227 /*
9228 * Debug info, assembly markup and statistics.
9229 */
9230#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9231 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9232 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9233#endif
9234#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9235 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9236 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9237 {
9238 if (iGstInstr < (int32_t)pTb->cInstructions)
9239 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9240 else
9241 Assert(iGstInstr == pTb->cInstructions);
9242 iGstInstr = pCallEntry->idxInstr;
9243 }
9244 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9245#endif
9246#if defined(VBOX_STRICT)
9247 off = iemNativeEmitMarker(pReNative, off,
9248 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9249#endif
9250#if defined(VBOX_STRICT)
9251 iemNativeRegAssertSanity(pReNative);
9252#endif
9253#ifdef VBOX_WITH_STATISTICS
9254 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9255#endif
9256
9257 /*
9258 * Actual work.
9259 */
9260 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9261 pfnRecom ? "(recompiled)" : "(todo)"));
9262 if (pfnRecom) /** @todo stats on this. */
9263 {
9264 off = pfnRecom(pReNative, off, pCallEntry);
9265 STAM_REL_STATS({cRecompiledCalls++;});
9266 }
9267 else
9268 {
9269 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9270 STAM_REL_STATS({cThreadedCalls++;});
9271 }
9272 Assert(off <= pReNative->cInstrBufAlloc);
9273 Assert(pReNative->cCondDepth == 0);
9274
9275#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9276 if (LogIs2Enabled())
9277 {
9278 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9279# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9280 static const char s_achState[] = "CUXI";
9281# else
9282 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9283# endif
9284
9285 char szGpr[17];
9286 for (unsigned i = 0; i < 16; i++)
9287 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9288 szGpr[16] = '\0';
9289
9290 char szSegBase[X86_SREG_COUNT + 1];
9291 char szSegLimit[X86_SREG_COUNT + 1];
9292 char szSegAttrib[X86_SREG_COUNT + 1];
9293 char szSegSel[X86_SREG_COUNT + 1];
9294 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9295 {
9296 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9297 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9298 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9299 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9300 }
9301 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9302 = szSegSel[X86_SREG_COUNT] = '\0';
9303
9304 char szEFlags[8];
9305 for (unsigned i = 0; i < 7; i++)
9306 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9307 szEFlags[7] = '\0';
9308
9309 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9310 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9311 }
9312#endif
9313
9314 /*
9315 * Advance.
9316 */
9317 pCallEntry++;
9318#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9319 idxCurCall++;
9320#endif
9321 }
9322
9323 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9324 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9325 if (!cThreadedCalls)
9326 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9327
9328 /*
9329 * Emit the epilog code.
9330 */
9331 uint32_t idxReturnLabel;
9332 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9333
9334 /*
9335 * Generate special jump labels.
9336 */
9337 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9338 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9339 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9340 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9341 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
9342 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
9343 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
9344 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
9345 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
9346 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
9347 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
9348 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
9349 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
9350 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
9351 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
9352 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
9353 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
9354 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
9355 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
9356 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
9357 }
9358 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9359 {
9360 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9361 return pTb;
9362 }
9363 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9364 Assert(off <= pReNative->cInstrBufAlloc);
9365
9366 /*
9367 * Make sure all labels has been defined.
9368 */
9369 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9370#ifdef VBOX_STRICT
9371 uint32_t const cLabels = pReNative->cLabels;
9372 for (uint32_t i = 0; i < cLabels; i++)
9373 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9374#endif
9375
9376 /*
9377 * Allocate executable memory, copy over the code we've generated.
9378 */
9379 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9380 if (pTbAllocator->pDelayedFreeHead)
9381 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9382
9383 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9384 AssertReturn(paFinalInstrBuf, pTb);
9385 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9386
9387 /*
9388 * Apply fixups.
9389 */
9390 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9391 uint32_t const cFixups = pReNative->cFixups;
9392 for (uint32_t i = 0; i < cFixups; i++)
9393 {
9394 Assert(paFixups[i].off < off);
9395 Assert(paFixups[i].idxLabel < cLabels);
9396 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9397 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9398 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9399 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9400 switch (paFixups[i].enmType)
9401 {
9402#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9403 case kIemNativeFixupType_Rel32:
9404 Assert(paFixups[i].off + 4 <= off);
9405 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9406 continue;
9407
9408#elif defined(RT_ARCH_ARM64)
9409 case kIemNativeFixupType_RelImm26At0:
9410 {
9411 Assert(paFixups[i].off < off);
9412 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9413 Assert(offDisp >= -262144 && offDisp < 262144);
9414 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9415 continue;
9416 }
9417
9418 case kIemNativeFixupType_RelImm19At5:
9419 {
9420 Assert(paFixups[i].off < off);
9421 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9422 Assert(offDisp >= -262144 && offDisp < 262144);
9423 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9424 continue;
9425 }
9426
9427 case kIemNativeFixupType_RelImm14At5:
9428 {
9429 Assert(paFixups[i].off < off);
9430 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9431 Assert(offDisp >= -8192 && offDisp < 8192);
9432 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9433 continue;
9434 }
9435
9436#endif
9437 case kIemNativeFixupType_Invalid:
9438 case kIemNativeFixupType_End:
9439 break;
9440 }
9441 AssertFailed();
9442 }
9443
9444 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9445 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9446
9447 /*
9448 * Convert the translation block.
9449 */
9450 RTMemFree(pTb->Thrd.paCalls);
9451 pTb->Native.paInstructions = paFinalInstrBuf;
9452 pTb->Native.cInstructions = off;
9453 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9454#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9455 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9456 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9457#endif
9458
9459 Assert(pTbAllocator->cThreadedTbs > 0);
9460 pTbAllocator->cThreadedTbs -= 1;
9461 pTbAllocator->cNativeTbs += 1;
9462 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9463
9464#ifdef LOG_ENABLED
9465 /*
9466 * Disassemble to the log if enabled.
9467 */
9468 if (LogIs3Enabled())
9469 {
9470 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9471 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9472# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9473 RTLogFlush(NULL);
9474# endif
9475 }
9476#endif
9477 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9478
9479 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9480 return pTb;
9481}
9482
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette