VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103836

Last change on this file since 103836 was 103832, checked in by vboxsync, 13 months ago

VMM/IEM: Fix bogus assertion, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 405.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103832 2024-03-13 14:49:29Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#NM.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseDeviceNotAvailableJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise a \#UD.
1607 */
1608IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1609{
1610 iemRaiseUndefinedOpcodeJmp(pVCpu);
1611#ifndef _MSC_VER
1612 return VINF_IEM_RAISED_XCPT; /* not reached */
1613#endif
1614}
1615
1616
1617/**
1618 * Used by TB code when it wants to raise a \#MF.
1619 */
1620IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1621{
1622 iemRaiseMathFaultJmp(pVCpu);
1623#ifndef _MSC_VER
1624 return VINF_IEM_RAISED_XCPT; /* not reached */
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code when it wants to raise a \#XF.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1633{
1634 iemRaiseSimdFpExceptionJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when detecting opcode changes.
1643 * @see iemThreadeFuncWorkerObsoleteTb
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1646{
1647 /* We set fSafeToFree to false where as we're being called in the context
1648 of a TB callback function, which for native TBs means we cannot release
1649 the executable memory till we've returned our way back to iemTbExec as
1650 that return path codes via the native code generated for the TB. */
1651 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1652 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1653 return VINF_IEM_REEXEC_BREAK;
1654}
1655
1656
1657/**
1658 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1661{
1662 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1663 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1664 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1665 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1666 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1667 return VINF_IEM_REEXEC_BREAK;
1668}
1669
1670
1671/**
1672 * Used by TB code when we missed a PC check after a branch.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1675{
1676 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1677 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1678 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1679 pVCpu->iem.s.pbInstrBuf));
1680 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1681 return VINF_IEM_REEXEC_BREAK;
1682}
1683
1684
1685
1686/*********************************************************************************************************************************
1687* Helpers: Segmented memory fetches and stores. *
1688*********************************************************************************************************************************/
1689
1690/**
1691 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1694{
1695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1696 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1697#else
1698 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1699#endif
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1705 * to 16 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1710 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1711#else
1712 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1713#endif
1714}
1715
1716
1717/**
1718 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1719 * to 32 bits.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1724 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1725#else
1726 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1727#endif
1728}
1729
1730/**
1731 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1732 * to 64 bits.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1737 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1738#else
1739 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1750 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1751#else
1752 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1759 * to 32 bits.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1764 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1765#else
1766 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1773 * to 64 bits.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1778 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1779#else
1780 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1791 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1792#else
1793 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1800 * to 64 bits.
1801 */
1802IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1803{
1804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1805 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1806#else
1807 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1808#endif
1809}
1810
1811
1812/**
1813 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1814 */
1815IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1816{
1817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1818 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1819#else
1820 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1821#endif
1822}
1823
1824
1825/**
1826 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1831 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1832#else
1833 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1844 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1845#else
1846 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1857 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1858#else
1859 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1870 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1871#else
1872 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1873#endif
1874}
1875
1876
1877
1878/**
1879 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1884 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1885#else
1886 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1897 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1898#else
1899 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to store an 32-bit selector value onto a generic stack.
1906 *
1907 * Intel CPUs doesn't do write a whole dword, thus the special function.
1908 */
1909IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1910{
1911#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1912 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1913#else
1914 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1915#endif
1916}
1917
1918
1919/**
1920 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1921 */
1922IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1923{
1924#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1925 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1926#else
1927 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1928#endif
1929}
1930
1931
1932/**
1933 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1934 */
1935IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1936{
1937#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1938 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1939#else
1940 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1941#endif
1942}
1943
1944
1945/**
1946 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1947 */
1948IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1949{
1950#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1951 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1952#else
1953 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1954#endif
1955}
1956
1957
1958/**
1959 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1960 */
1961IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1962{
1963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1964 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1965#else
1966 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1967#endif
1968}
1969
1970
1971
1972/*********************************************************************************************************************************
1973* Helpers: Flat memory fetches and stores. *
1974*********************************************************************************************************************************/
1975
1976/**
1977 * Used by TB code to load unsigned 8-bit data w/ flat address.
1978 * @note Zero extending the value to 64-bit to simplify assembly.
1979 */
1980IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1981{
1982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1983 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1984#else
1985 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1986#endif
1987}
1988
1989
1990/**
1991 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1992 * to 16 bits.
1993 * @note Zero extending the value to 64-bit to simplify assembly.
1994 */
1995IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1996{
1997#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1998 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1999#else
2000 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2001#endif
2002}
2003
2004
2005/**
2006 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2007 * to 32 bits.
2008 * @note Zero extending the value to 64-bit to simplify assembly.
2009 */
2010IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2011{
2012#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2013 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2014#else
2015 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2016#endif
2017}
2018
2019
2020/**
2021 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2022 * to 64 bits.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2027 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2028#else
2029 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to load unsigned 16-bit data w/ flat address.
2036 * @note Zero extending the value to 64-bit to simplify assembly.
2037 */
2038IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2039{
2040#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2041 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2042#else
2043 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2044#endif
2045}
2046
2047
2048/**
2049 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2050 * to 32 bits.
2051 * @note Zero extending the value to 64-bit to simplify assembly.
2052 */
2053IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2054{
2055#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2056 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2057#else
2058 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2059#endif
2060}
2061
2062
2063/**
2064 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2065 * to 64 bits.
2066 * @note Zero extending the value to 64-bit to simplify assembly.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to load unsigned 32-bit data w/ flat address.
2080 * @note Zero extending the value to 64-bit to simplify assembly.
2081 */
2082IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2083{
2084#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2085 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2086#else
2087 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2088#endif
2089}
2090
2091
2092/**
2093 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2094 * to 64 bits.
2095 * @note Zero extending the value to 64-bit to simplify assembly.
2096 */
2097IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2098{
2099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2100 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2101#else
2102 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2103#endif
2104}
2105
2106
2107/**
2108 * Used by TB code to load unsigned 64-bit data w/ flat address.
2109 */
2110IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2111{
2112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2113 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2114#else
2115 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2116#endif
2117}
2118
2119
2120/**
2121 * Used by TB code to store unsigned 8-bit data w/ flat address.
2122 */
2123IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2124{
2125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2126 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2127#else
2128 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2129#endif
2130}
2131
2132
2133/**
2134 * Used by TB code to store unsigned 16-bit data w/ flat address.
2135 */
2136IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2137{
2138#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2139 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2140#else
2141 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2142#endif
2143}
2144
2145
2146/**
2147 * Used by TB code to store unsigned 32-bit data w/ flat address.
2148 */
2149IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2150{
2151#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2152 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2153#else
2154 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2155#endif
2156}
2157
2158
2159/**
2160 * Used by TB code to store unsigned 64-bit data w/ flat address.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2165 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2166#else
2167 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2168#endif
2169}
2170
2171
2172
2173/**
2174 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2175 */
2176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2177{
2178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2179 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2180#else
2181 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2182#endif
2183}
2184
2185
2186/**
2187 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2188 */
2189IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2190{
2191#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2192 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2193#else
2194 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2195#endif
2196}
2197
2198
2199/**
2200 * Used by TB code to store a segment selector value onto a flat stack.
2201 *
2202 * Intel CPUs doesn't do write a whole dword, thus the special function.
2203 */
2204IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2205{
2206#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2207 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2208#else
2209 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2210#endif
2211}
2212
2213
2214/**
2215 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2216 */
2217IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2218{
2219#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2220 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2221#else
2222 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2223#endif
2224}
2225
2226
2227/**
2228 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2229 */
2230IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2231{
2232#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2233 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2234#else
2235 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2236#endif
2237}
2238
2239
2240/**
2241 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2242 */
2243IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2244{
2245#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2246 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2247#else
2248 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2249#endif
2250}
2251
2252
2253/**
2254 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2255 */
2256IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2257{
2258#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2259 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2260#else
2261 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2262#endif
2263}
2264
2265
2266
2267/*********************************************************************************************************************************
2268* Helpers: Segmented memory mapping. *
2269*********************************************************************************************************************************/
2270
2271/**
2272 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2273 * segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2288 */
2289IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2290 RTGCPTR GCPtrMem, uint8_t iSegReg))
2291{
2292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2293 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2294#else
2295 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2296#endif
2297}
2298
2299
2300/**
2301 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2302 */
2303IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2304 RTGCPTR GCPtrMem, uint8_t iSegReg))
2305{
2306#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2307 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2308#else
2309 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2310#endif
2311}
2312
2313
2314/**
2315 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2316 */
2317IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2318 RTGCPTR GCPtrMem, uint8_t iSegReg))
2319{
2320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2321 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2322#else
2323 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2324#endif
2325}
2326
2327
2328/**
2329 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2330 * segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2345 */
2346IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2347 RTGCPTR GCPtrMem, uint8_t iSegReg))
2348{
2349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2350 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2351#else
2352 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2353#endif
2354}
2355
2356
2357/**
2358 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2361 RTGCPTR GCPtrMem, uint8_t iSegReg))
2362{
2363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2364 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2365#else
2366 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2367#endif
2368}
2369
2370
2371/**
2372 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2373 */
2374IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2375 RTGCPTR GCPtrMem, uint8_t iSegReg))
2376{
2377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2378 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2379#else
2380 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2381#endif
2382}
2383
2384
2385/**
2386 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2387 * segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2402 */
2403IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2404 RTGCPTR GCPtrMem, uint8_t iSegReg))
2405{
2406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2407 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2408#else
2409 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2410#endif
2411}
2412
2413
2414/**
2415 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2416 */
2417IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2418 RTGCPTR GCPtrMem, uint8_t iSegReg))
2419{
2420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2421 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2422#else
2423 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2424#endif
2425}
2426
2427
2428/**
2429 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2430 */
2431IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2432 RTGCPTR GCPtrMem, uint8_t iSegReg))
2433{
2434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2435 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2436#else
2437 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2438#endif
2439}
2440
2441
2442/**
2443 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2444 * segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2487 */
2488IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2489 RTGCPTR GCPtrMem, uint8_t iSegReg))
2490{
2491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2492 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2493#else
2494 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2495#endif
2496}
2497
2498
2499/**
2500 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2501 */
2502IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2503 RTGCPTR GCPtrMem, uint8_t iSegReg))
2504{
2505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2506 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2507#else
2508 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2509#endif
2510}
2511
2512
2513/**
2514 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2515 */
2516IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2517 RTGCPTR GCPtrMem, uint8_t iSegReg))
2518{
2519#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2520 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2521#else
2522 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2523#endif
2524}
2525
2526
2527/**
2528 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2529 * segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/**
2543 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2544 */
2545IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2546 RTGCPTR GCPtrMem, uint8_t iSegReg))
2547{
2548#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2549 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2550#else
2551 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2552#endif
2553}
2554
2555
2556/**
2557 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2558 */
2559IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2560 RTGCPTR GCPtrMem, uint8_t iSegReg))
2561{
2562#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2563 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2564#else
2565 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2566#endif
2567}
2568
2569
2570/**
2571 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2572 */
2573IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2574 RTGCPTR GCPtrMem, uint8_t iSegReg))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2578#else
2579 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2580#endif
2581}
2582
2583
2584/*********************************************************************************************************************************
2585* Helpers: Flat memory mapping. *
2586*********************************************************************************************************************************/
2587
2588/**
2589 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2590 * address.
2591 */
2592IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2593{
2594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2595 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2596#else
2597 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2598#endif
2599}
2600
2601
2602/**
2603 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2604 */
2605IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2606{
2607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2608 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2609#else
2610 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2611#endif
2612}
2613
2614
2615/**
2616 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2617 */
2618IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2619{
2620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2621 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2622#else
2623 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2624#endif
2625}
2626
2627
2628/**
2629 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2630 */
2631IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2632{
2633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2634 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2635#else
2636 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2637#endif
2638}
2639
2640
2641/**
2642 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2643 * address.
2644 */
2645IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2646{
2647#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2648 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2649#else
2650 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2651#endif
2652}
2653
2654
2655/**
2656 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2657 */
2658IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2659{
2660#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2661 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2662#else
2663 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2664#endif
2665}
2666
2667
2668/**
2669 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2670 */
2671IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2672{
2673#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2674 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2675#else
2676 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2677#endif
2678}
2679
2680
2681/**
2682 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2683 */
2684IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2685{
2686#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2687 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2688#else
2689 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2690#endif
2691}
2692
2693
2694/**
2695 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2696 * address.
2697 */
2698IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2699{
2700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2701 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2702#else
2703 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2704#endif
2705}
2706
2707
2708/**
2709 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2710 */
2711IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2715#else
2716 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2725{
2726#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2727 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2728#else
2729 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2730#endif
2731}
2732
2733
2734/**
2735 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2736 */
2737IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2738{
2739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2740 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2741#else
2742 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2743#endif
2744}
2745
2746
2747/**
2748 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2749 * address.
2750 */
2751IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2752{
2753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2754 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2755#else
2756 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2757#endif
2758}
2759
2760
2761/**
2762 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2763 */
2764IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2765{
2766#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2767 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2768#else
2769 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2770#endif
2771}
2772
2773
2774/**
2775 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2776 */
2777IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2778{
2779#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2780 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2781#else
2782 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2783#endif
2784}
2785
2786
2787/**
2788 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2789 */
2790IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2791{
2792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2793 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2794#else
2795 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2796#endif
2797}
2798
2799
2800/**
2801 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2802 */
2803IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2804{
2805#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2806 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2807#else
2808 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2809#endif
2810}
2811
2812
2813/**
2814 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2815 */
2816IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2817{
2818#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2819 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2820#else
2821 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2822#endif
2823}
2824
2825
2826/**
2827 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2828 * address.
2829 */
2830IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2831{
2832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2833 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2834#else
2835 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2836#endif
2837}
2838
2839
2840/**
2841 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2842 */
2843IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2844{
2845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2846 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2847#else
2848 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2849#endif
2850}
2851
2852
2853/**
2854 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2855 */
2856IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2857{
2858#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2859 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2860#else
2861 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2862#endif
2863}
2864
2865
2866/**
2867 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2868 */
2869IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2870{
2871#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2872 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2873#else
2874 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2875#endif
2876}
2877
2878
2879/*********************************************************************************************************************************
2880* Helpers: Commit, rollback & unmap *
2881*********************************************************************************************************************************/
2882
2883/**
2884 * Used by TB code to commit and unmap a read-write memory mapping.
2885 */
2886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2887{
2888 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2889}
2890
2891
2892/**
2893 * Used by TB code to commit and unmap a read-write memory mapping.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2896{
2897 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2898}
2899
2900
2901/**
2902 * Used by TB code to commit and unmap a write-only memory mapping.
2903 */
2904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2905{
2906 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2907}
2908
2909
2910/**
2911 * Used by TB code to commit and unmap a read-only memory mapping.
2912 */
2913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2914{
2915 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2916}
2917
2918
2919/**
2920 * Reinitializes the native recompiler state.
2921 *
2922 * Called before starting a new recompile job.
2923 */
2924static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2925{
2926 pReNative->cLabels = 0;
2927 pReNative->bmLabelTypes = 0;
2928 pReNative->cFixups = 0;
2929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2930 pReNative->pDbgInfo->cEntries = 0;
2931#endif
2932 pReNative->pTbOrg = pTb;
2933 pReNative->cCondDepth = 0;
2934 pReNative->uCondSeqNo = 0;
2935 pReNative->uCheckIrqSeqNo = 0;
2936 pReNative->uTlbSeqNo = 0;
2937
2938#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2939 pReNative->Core.offPc = 0;
2940 pReNative->Core.cInstrPcUpdateSkipped = 0;
2941#endif
2942 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2943#if IEMNATIVE_HST_GREG_COUNT < 32
2944 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2945#endif
2946 ;
2947 pReNative->Core.bmHstRegsWithGstShadow = 0;
2948 pReNative->Core.bmGstRegShadows = 0;
2949 pReNative->Core.bmVars = 0;
2950 pReNative->Core.bmStack = 0;
2951 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2952 pReNative->Core.u64ArgVars = UINT64_MAX;
2953
2954 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2955 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2956 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2957 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2958 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2959 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2960 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2961 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2962 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2963 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2964 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2965 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2968
2969 /* Full host register reinit: */
2970 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2971 {
2972 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2973 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2974 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2975 }
2976
2977 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2978 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2979#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2980 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2981#endif
2982#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2983 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2984#endif
2985#ifdef IEMNATIVE_REG_FIXED_TMP1
2986 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2987#endif
2988#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2989 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2990#endif
2991 );
2992 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2993 {
2994 fRegs &= ~RT_BIT_32(idxReg);
2995 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2996 }
2997
2998 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2999#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3000 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3001#endif
3002#ifdef IEMNATIVE_REG_FIXED_TMP0
3003 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3004#endif
3005#ifdef IEMNATIVE_REG_FIXED_TMP1
3006 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3007#endif
3008#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3009 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3010#endif
3011
3012#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3013# ifdef RT_ARCH_ARM64
3014 /*
3015 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3016 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3017 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3018 * and the register allocator assumes that it will be always free when the lower is picked.
3019 */
3020 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3021# else
3022 uint32_t const fFixedAdditional = 0;
3023# endif
3024
3025 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3026 | fFixedAdditional
3027# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3028 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3029# endif
3030 ;
3031 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3032 pReNative->Core.bmGstSimdRegShadows = 0;
3033 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3034 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3035
3036 /* Full host register reinit: */
3037 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3038 {
3039 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3040 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3041 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3042 }
3043
3044 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3045 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3046 {
3047 fRegs &= ~RT_BIT_32(idxReg);
3048 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3049 }
3050
3051#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3052 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3053#endif
3054
3055#endif
3056
3057 return pReNative;
3058}
3059
3060
3061/**
3062 * Allocates and initializes the native recompiler state.
3063 *
3064 * This is called the first time an EMT wants to recompile something.
3065 *
3066 * @returns Pointer to the new recompiler state.
3067 * @param pVCpu The cross context virtual CPU structure of the calling
3068 * thread.
3069 * @param pTb The TB that's about to be recompiled.
3070 * @thread EMT(pVCpu)
3071 */
3072static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3073{
3074 VMCPU_ASSERT_EMT(pVCpu);
3075
3076 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3077 AssertReturn(pReNative, NULL);
3078
3079 /*
3080 * Try allocate all the buffers and stuff we need.
3081 */
3082 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3083 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3084 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3086 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3087#endif
3088 if (RT_LIKELY( pReNative->pInstrBuf
3089 && pReNative->paLabels
3090 && pReNative->paFixups)
3091#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3092 && pReNative->pDbgInfo
3093#endif
3094 )
3095 {
3096 /*
3097 * Set the buffer & array sizes on success.
3098 */
3099 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3100 pReNative->cLabelsAlloc = _8K;
3101 pReNative->cFixupsAlloc = _16K;
3102#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3103 pReNative->cDbgInfoAlloc = _16K;
3104#endif
3105
3106 /* Other constant stuff: */
3107 pReNative->pVCpu = pVCpu;
3108
3109 /*
3110 * Done, just need to save it and reinit it.
3111 */
3112 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3113 return iemNativeReInit(pReNative, pTb);
3114 }
3115
3116 /*
3117 * Failed. Cleanup and return.
3118 */
3119 AssertFailed();
3120 RTMemFree(pReNative->pInstrBuf);
3121 RTMemFree(pReNative->paLabels);
3122 RTMemFree(pReNative->paFixups);
3123#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3124 RTMemFree(pReNative->pDbgInfo);
3125#endif
3126 RTMemFree(pReNative);
3127 return NULL;
3128}
3129
3130
3131/**
3132 * Creates a label
3133 *
3134 * If the label does not yet have a defined position,
3135 * call iemNativeLabelDefine() later to set it.
3136 *
3137 * @returns Label ID. Throws VBox status code on failure, so no need to check
3138 * the return value.
3139 * @param pReNative The native recompile state.
3140 * @param enmType The label type.
3141 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3142 * label is not yet defined (default).
3143 * @param uData Data associated with the lable. Only applicable to
3144 * certain type of labels. Default is zero.
3145 */
3146DECL_HIDDEN_THROW(uint32_t)
3147iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3148 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3149{
3150 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3151
3152 /*
3153 * Locate existing label definition.
3154 *
3155 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3156 * and uData is zero.
3157 */
3158 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3159 uint32_t const cLabels = pReNative->cLabels;
3160 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3161#ifndef VBOX_STRICT
3162 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3163 && offWhere == UINT32_MAX
3164 && uData == 0
3165#endif
3166 )
3167 {
3168#ifndef VBOX_STRICT
3169 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3170 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3171 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3172 if (idxLabel < pReNative->cLabels)
3173 return idxLabel;
3174#else
3175 for (uint32_t i = 0; i < cLabels; i++)
3176 if ( paLabels[i].enmType == enmType
3177 && paLabels[i].uData == uData)
3178 {
3179 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3180 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3182 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3183 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3184 return i;
3185 }
3186 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3187 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3188#endif
3189 }
3190
3191 /*
3192 * Make sure we've got room for another label.
3193 */
3194 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3195 { /* likely */ }
3196 else
3197 {
3198 uint32_t cNew = pReNative->cLabelsAlloc;
3199 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3200 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3201 cNew *= 2;
3202 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3203 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3204 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3205 pReNative->paLabels = paLabels;
3206 pReNative->cLabelsAlloc = cNew;
3207 }
3208
3209 /*
3210 * Define a new label.
3211 */
3212 paLabels[cLabels].off = offWhere;
3213 paLabels[cLabels].enmType = enmType;
3214 paLabels[cLabels].uData = uData;
3215 pReNative->cLabels = cLabels + 1;
3216
3217 Assert((unsigned)enmType < 64);
3218 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3219
3220 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3221 {
3222 Assert(uData == 0);
3223 pReNative->aidxUniqueLabels[enmType] = cLabels;
3224 }
3225
3226 if (offWhere != UINT32_MAX)
3227 {
3228#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3229 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3230 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3231#endif
3232 }
3233 return cLabels;
3234}
3235
3236
3237/**
3238 * Defines the location of an existing label.
3239 *
3240 * @param pReNative The native recompile state.
3241 * @param idxLabel The label to define.
3242 * @param offWhere The position.
3243 */
3244DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3245{
3246 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3247 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3248 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3249 pLabel->off = offWhere;
3250#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3251 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3252 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3253#endif
3254}
3255
3256
3257/**
3258 * Looks up a lable.
3259 *
3260 * @returns Label ID if found, UINT32_MAX if not.
3261 */
3262static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3263 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3264{
3265 Assert((unsigned)enmType < 64);
3266 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3267 {
3268 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3269 return pReNative->aidxUniqueLabels[enmType];
3270
3271 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3272 uint32_t const cLabels = pReNative->cLabels;
3273 for (uint32_t i = 0; i < cLabels; i++)
3274 if ( paLabels[i].enmType == enmType
3275 && paLabels[i].uData == uData
3276 && ( paLabels[i].off == offWhere
3277 || offWhere == UINT32_MAX
3278 || paLabels[i].off == UINT32_MAX))
3279 return i;
3280 }
3281 return UINT32_MAX;
3282}
3283
3284
3285/**
3286 * Adds a fixup.
3287 *
3288 * @throws VBox status code (int) on failure.
3289 * @param pReNative The native recompile state.
3290 * @param offWhere The instruction offset of the fixup location.
3291 * @param idxLabel The target label ID for the fixup.
3292 * @param enmType The fixup type.
3293 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3294 */
3295DECL_HIDDEN_THROW(void)
3296iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3297 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3298{
3299 Assert(idxLabel <= UINT16_MAX);
3300 Assert((unsigned)enmType <= UINT8_MAX);
3301
3302 /*
3303 * Make sure we've room.
3304 */
3305 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3306 uint32_t const cFixups = pReNative->cFixups;
3307 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3308 { /* likely */ }
3309 else
3310 {
3311 uint32_t cNew = pReNative->cFixupsAlloc;
3312 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3313 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3314 cNew *= 2;
3315 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3316 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3317 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3318 pReNative->paFixups = paFixups;
3319 pReNative->cFixupsAlloc = cNew;
3320 }
3321
3322 /*
3323 * Add the fixup.
3324 */
3325 paFixups[cFixups].off = offWhere;
3326 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3327 paFixups[cFixups].enmType = enmType;
3328 paFixups[cFixups].offAddend = offAddend;
3329 pReNative->cFixups = cFixups + 1;
3330}
3331
3332
3333/**
3334 * Slow code path for iemNativeInstrBufEnsure.
3335 */
3336DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3337{
3338 /* Double the buffer size till we meet the request. */
3339 uint32_t cNew = pReNative->cInstrBufAlloc;
3340 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3341 do
3342 cNew *= 2;
3343 while (cNew < off + cInstrReq);
3344
3345 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3346#ifdef RT_ARCH_ARM64
3347 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3348#else
3349 uint32_t const cbMaxInstrBuf = _2M;
3350#endif
3351 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3352
3353 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3354 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3355
3356#ifdef VBOX_STRICT
3357 pReNative->offInstrBufChecked = off + cInstrReq;
3358#endif
3359 pReNative->cInstrBufAlloc = cNew;
3360 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3361}
3362
3363#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3364
3365/**
3366 * Grows the static debug info array used during recompilation.
3367 *
3368 * @returns Pointer to the new debug info block; throws VBox status code on
3369 * failure, so no need to check the return value.
3370 */
3371DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3372{
3373 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3374 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3375 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3376 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3377 pReNative->pDbgInfo = pDbgInfo;
3378 pReNative->cDbgInfoAlloc = cNew;
3379 return pDbgInfo;
3380}
3381
3382
3383/**
3384 * Adds a new debug info uninitialized entry, returning the pointer to it.
3385 */
3386DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3387{
3388 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3389 { /* likely */ }
3390 else
3391 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3392 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3393}
3394
3395
3396/**
3397 * Debug Info: Adds a native offset record, if necessary.
3398 */
3399DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3400{
3401 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3402
3403 /*
3404 * Search backwards to see if we've got a similar record already.
3405 */
3406 uint32_t idx = pDbgInfo->cEntries;
3407 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3408 while (idx-- > idxStop)
3409 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3410 {
3411 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3412 return;
3413 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3414 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3415 break;
3416 }
3417
3418 /*
3419 * Add it.
3420 */
3421 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3422 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3423 pEntry->NativeOffset.offNative = off;
3424}
3425
3426
3427/**
3428 * Debug Info: Record info about a label.
3429 */
3430static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3431{
3432 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3433 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3434 pEntry->Label.uUnused = 0;
3435 pEntry->Label.enmLabel = (uint8_t)enmType;
3436 pEntry->Label.uData = uData;
3437}
3438
3439
3440/**
3441 * Debug Info: Record info about a threaded call.
3442 */
3443static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3444{
3445 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3446 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3447 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3448 pEntry->ThreadedCall.uUnused = 0;
3449 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3450}
3451
3452
3453/**
3454 * Debug Info: Record info about a new guest instruction.
3455 */
3456static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3457{
3458 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3459 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3460 pEntry->GuestInstruction.uUnused = 0;
3461 pEntry->GuestInstruction.fExec = fExec;
3462}
3463
3464
3465/**
3466 * Debug Info: Record info about guest register shadowing.
3467 */
3468DECL_HIDDEN_THROW(void)
3469iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3470 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3471{
3472 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3473 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3474 pEntry->GuestRegShadowing.uUnused = 0;
3475 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3476 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3477 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3478}
3479
3480
3481# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3482/**
3483 * Debug Info: Record info about guest register shadowing.
3484 */
3485DECL_HIDDEN_THROW(void)
3486iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3487 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3488{
3489 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3490 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3491 pEntry->GuestSimdRegShadowing.uUnused = 0;
3492 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3493 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3494 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3495}
3496# endif
3497
3498
3499# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3500/**
3501 * Debug Info: Record info about delayed RIP updates.
3502 */
3503DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3504{
3505 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3506 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3507 pEntry->DelayedPcUpdate.offPc = offPc;
3508 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3509}
3510# endif
3511
3512#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3513
3514
3515/*********************************************************************************************************************************
3516* Register Allocator *
3517*********************************************************************************************************************************/
3518
3519/**
3520 * Register parameter indexes (indexed by argument number).
3521 */
3522DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3523{
3524 IEMNATIVE_CALL_ARG0_GREG,
3525 IEMNATIVE_CALL_ARG1_GREG,
3526 IEMNATIVE_CALL_ARG2_GREG,
3527 IEMNATIVE_CALL_ARG3_GREG,
3528#if defined(IEMNATIVE_CALL_ARG4_GREG)
3529 IEMNATIVE_CALL_ARG4_GREG,
3530# if defined(IEMNATIVE_CALL_ARG5_GREG)
3531 IEMNATIVE_CALL_ARG5_GREG,
3532# if defined(IEMNATIVE_CALL_ARG6_GREG)
3533 IEMNATIVE_CALL_ARG6_GREG,
3534# if defined(IEMNATIVE_CALL_ARG7_GREG)
3535 IEMNATIVE_CALL_ARG7_GREG,
3536# endif
3537# endif
3538# endif
3539#endif
3540};
3541AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3542
3543/**
3544 * Call register masks indexed by argument count.
3545 */
3546DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3547{
3548 0,
3549 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3550 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3551 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3552 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3553 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3554#if defined(IEMNATIVE_CALL_ARG4_GREG)
3555 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3556 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3557# if defined(IEMNATIVE_CALL_ARG5_GREG)
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3559 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3560# if defined(IEMNATIVE_CALL_ARG6_GREG)
3561 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3562 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3564# if defined(IEMNATIVE_CALL_ARG7_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3567 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3568# endif
3569# endif
3570# endif
3571#endif
3572};
3573
3574#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3575/**
3576 * BP offset of the stack argument slots.
3577 *
3578 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3579 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3580 */
3581DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3582{
3583 IEMNATIVE_FP_OFF_STACK_ARG0,
3584# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3585 IEMNATIVE_FP_OFF_STACK_ARG1,
3586# endif
3587# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3588 IEMNATIVE_FP_OFF_STACK_ARG2,
3589# endif
3590# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3591 IEMNATIVE_FP_OFF_STACK_ARG3,
3592# endif
3593};
3594AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3595#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3596
3597/**
3598 * Info about shadowed guest register values.
3599 * @see IEMNATIVEGSTREG
3600 */
3601DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3602{
3603#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3604 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3605 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3606 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3607 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3608 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3609 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3610 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3611 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3612 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3613 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3614 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3615 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3616 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3617 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3618 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3620 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3621 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3622 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3623 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3624 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3625 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3626 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3627 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3628 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3629 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3630 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3631 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3632 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3633 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3634 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3635 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3636 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3637 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3638 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3639 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3640 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3641 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3642 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3643 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3644 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3645 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3646 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3647 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3648 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3649 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3650 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3651 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3652#undef CPUMCTX_OFF_AND_SIZE
3653};
3654AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3655
3656
3657/** Host CPU general purpose register names. */
3658DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3659{
3660#ifdef RT_ARCH_AMD64
3661 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3662#elif RT_ARCH_ARM64
3663 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3664 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3665#else
3666# error "port me"
3667#endif
3668};
3669
3670
3671#if 0 /* unused */
3672/**
3673 * Tries to locate a suitable register in the given register mask.
3674 *
3675 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3676 * failed.
3677 *
3678 * @returns Host register number on success, returns UINT8_MAX on failure.
3679 */
3680static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3681{
3682 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3683 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3684 if (fRegs)
3685 {
3686 /** @todo pick better here: */
3687 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3688
3689 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3690 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3691 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3692 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3693
3694 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3695 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3696 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3697 return idxReg;
3698 }
3699 return UINT8_MAX;
3700}
3701#endif /* unused */
3702
3703
3704/**
3705 * Locate a register, possibly freeing one up.
3706 *
3707 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3708 * failed.
3709 *
3710 * @returns Host register number on success. Returns UINT8_MAX if no registers
3711 * found, the caller is supposed to deal with this and raise a
3712 * allocation type specific status code (if desired).
3713 *
3714 * @throws VBox status code if we're run into trouble spilling a variable of
3715 * recording debug info. Does NOT throw anything if we're out of
3716 * registers, though.
3717 */
3718static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3719 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3720{
3721 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3722 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3723 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3724
3725 /*
3726 * Try a freed register that's shadowing a guest register.
3727 */
3728 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3729 if (fRegs)
3730 {
3731 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3732
3733#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3734 /*
3735 * When we have livness information, we use it to kick out all shadowed
3736 * guest register that will not be needed any more in this TB. If we're
3737 * lucky, this may prevent us from ending up here again.
3738 *
3739 * Note! We must consider the previous entry here so we don't free
3740 * anything that the current threaded function requires (current
3741 * entry is produced by the next threaded function).
3742 */
3743 uint32_t const idxCurCall = pReNative->idxCurCall;
3744 if (idxCurCall > 0)
3745 {
3746 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3747
3748# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3749 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3750 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3751 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3752#else
3753 /* Construct a mask of the registers not in the read or write state.
3754 Note! We could skips writes, if they aren't from us, as this is just
3755 a hack to prevent trashing registers that have just been written
3756 or will be written when we retire the current instruction. */
3757 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3758 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3759 & IEMLIVENESSBIT_MASK;
3760#endif
3761 /* Merge EFLAGS. */
3762 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3763 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3764 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3765 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3766 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3767
3768 /* If it matches any shadowed registers. */
3769 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3770 {
3771 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3772 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3773 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3774
3775 /* See if we've got any unshadowed registers we can return now. */
3776 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3777 if (fUnshadowedRegs)
3778 {
3779 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3780 return (fPreferVolatile
3781 ? ASMBitFirstSetU32(fUnshadowedRegs)
3782 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3783 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3784 - 1;
3785 }
3786 }
3787 }
3788#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3789
3790 unsigned const idxReg = (fPreferVolatile
3791 ? ASMBitFirstSetU32(fRegs)
3792 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3793 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3794 - 1;
3795
3796 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3797 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3798 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3799 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3800
3801 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3802 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3803 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3804 return idxReg;
3805 }
3806
3807 /*
3808 * Try free up a variable that's in a register.
3809 *
3810 * We do two rounds here, first evacuating variables we don't need to be
3811 * saved on the stack, then in the second round move things to the stack.
3812 */
3813 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3814 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3815 {
3816 uint32_t fVars = pReNative->Core.bmVars;
3817 while (fVars)
3818 {
3819 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3820 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3821 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3822 && (RT_BIT_32(idxReg) & fRegMask)
3823 && ( iLoop == 0
3824 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3825 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3826 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3827 {
3828 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3829 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3830 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3831 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3832 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3833 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3834
3835 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3836 {
3837 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3838 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3839 }
3840
3841 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3842 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3843
3844 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3845 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3846 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3847 return idxReg;
3848 }
3849 fVars &= ~RT_BIT_32(idxVar);
3850 }
3851 }
3852
3853 return UINT8_MAX;
3854}
3855
3856
3857/**
3858 * Reassigns a variable to a different register specified by the caller.
3859 *
3860 * @returns The new code buffer position.
3861 * @param pReNative The native recompile state.
3862 * @param off The current code buffer position.
3863 * @param idxVar The variable index.
3864 * @param idxRegOld The old host register number.
3865 * @param idxRegNew The new host register number.
3866 * @param pszCaller The caller for logging.
3867 */
3868static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3869 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3870{
3871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3872 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3873 RT_NOREF(pszCaller);
3874
3875 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3876
3877 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3878 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3879 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3880 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3881
3882 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3883 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3884 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3885 if (fGstRegShadows)
3886 {
3887 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3888 | RT_BIT_32(idxRegNew);
3889 while (fGstRegShadows)
3890 {
3891 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3892 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3893
3894 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3895 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3896 }
3897 }
3898
3899 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3900 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3901 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3902 return off;
3903}
3904
3905
3906/**
3907 * Moves a variable to a different register or spills it onto the stack.
3908 *
3909 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3910 * kinds can easily be recreated if needed later.
3911 *
3912 * @returns The new code buffer position.
3913 * @param pReNative The native recompile state.
3914 * @param off The current code buffer position.
3915 * @param idxVar The variable index.
3916 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3917 * call-volatile registers.
3918 */
3919DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3920 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3921{
3922 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3923 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3924 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3925 Assert(!pVar->fRegAcquired);
3926
3927 uint8_t const idxRegOld = pVar->idxReg;
3928 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3929 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3930 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3931 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3932 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3933 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3934 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3935 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3936
3937
3938 /** @todo Add statistics on this.*/
3939 /** @todo Implement basic variable liveness analysis (python) so variables
3940 * can be freed immediately once no longer used. This has the potential to
3941 * be trashing registers and stack for dead variables.
3942 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3943
3944 /*
3945 * First try move it to a different register, as that's cheaper.
3946 */
3947 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3948 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3949 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3950 if (fRegs)
3951 {
3952 /* Avoid using shadow registers, if possible. */
3953 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3954 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3955 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3956 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3957 }
3958
3959 /*
3960 * Otherwise we must spill the register onto the stack.
3961 */
3962 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3963 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3964 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3965 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3966
3967 pVar->idxReg = UINT8_MAX;
3968 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3969 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3970 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3971 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3972 return off;
3973}
3974
3975
3976/**
3977 * Allocates a temporary host general purpose register.
3978 *
3979 * This may emit code to save register content onto the stack in order to free
3980 * up a register.
3981 *
3982 * @returns The host register number; throws VBox status code on failure,
3983 * so no need to check the return value.
3984 * @param pReNative The native recompile state.
3985 * @param poff Pointer to the variable with the code buffer position.
3986 * This will be update if we need to move a variable from
3987 * register to stack in order to satisfy the request.
3988 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3989 * registers (@c true, default) or the other way around
3990 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3991 */
3992DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3993{
3994 /*
3995 * Try find a completely unused register, preferably a call-volatile one.
3996 */
3997 uint8_t idxReg;
3998 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3999 & ~pReNative->Core.bmHstRegsWithGstShadow
4000 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4001 if (fRegs)
4002 {
4003 if (fPreferVolatile)
4004 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4005 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4006 else
4007 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4008 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4009 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4010 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4011 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4012 }
4013 else
4014 {
4015 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4016 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4017 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4018 }
4019 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4020}
4021
4022
4023/**
4024 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4025 * registers.
4026 *
4027 * @returns The host register number; throws VBox status code on failure,
4028 * so no need to check the return value.
4029 * @param pReNative The native recompile state.
4030 * @param poff Pointer to the variable with the code buffer position.
4031 * This will be update if we need to move a variable from
4032 * register to stack in order to satisfy the request.
4033 * @param fRegMask Mask of acceptable registers.
4034 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4035 * registers (@c true, default) or the other way around
4036 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4037 */
4038DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4039 bool fPreferVolatile /*= true*/)
4040{
4041 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4042 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4043
4044 /*
4045 * Try find a completely unused register, preferably a call-volatile one.
4046 */
4047 uint8_t idxReg;
4048 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4049 & ~pReNative->Core.bmHstRegsWithGstShadow
4050 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4051 & fRegMask;
4052 if (fRegs)
4053 {
4054 if (fPreferVolatile)
4055 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4056 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4057 else
4058 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4059 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4060 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4061 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4062 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4063 }
4064 else
4065 {
4066 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4067 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4068 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4069 }
4070 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4071}
4072
4073
4074/**
4075 * Allocates a temporary register for loading an immediate value into.
4076 *
4077 * This will emit code to load the immediate, unless there happens to be an
4078 * unused register with the value already loaded.
4079 *
4080 * The caller will not modify the returned register, it must be considered
4081 * read-only. Free using iemNativeRegFreeTmpImm.
4082 *
4083 * @returns The host register number; throws VBox status code on failure, so no
4084 * need to check the return value.
4085 * @param pReNative The native recompile state.
4086 * @param poff Pointer to the variable with the code buffer position.
4087 * @param uImm The immediate value that the register must hold upon
4088 * return.
4089 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4090 * registers (@c true, default) or the other way around
4091 * (@c false).
4092 *
4093 * @note Reusing immediate values has not been implemented yet.
4094 */
4095DECL_HIDDEN_THROW(uint8_t)
4096iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4097{
4098 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4099 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4100 return idxReg;
4101}
4102
4103
4104/**
4105 * Allocates a temporary host general purpose register for keeping a guest
4106 * register value.
4107 *
4108 * Since we may already have a register holding the guest register value,
4109 * code will be emitted to do the loading if that's not the case. Code may also
4110 * be emitted if we have to free up a register to satify the request.
4111 *
4112 * @returns The host register number; throws VBox status code on failure, so no
4113 * need to check the return value.
4114 * @param pReNative The native recompile state.
4115 * @param poff Pointer to the variable with the code buffer
4116 * position. This will be update if we need to move a
4117 * variable from register to stack in order to satisfy
4118 * the request.
4119 * @param enmGstReg The guest register that will is to be updated.
4120 * @param enmIntendedUse How the caller will be using the host register.
4121 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4122 * register is okay (default). The ASSUMPTION here is
4123 * that the caller has already flushed all volatile
4124 * registers, so this is only applied if we allocate a
4125 * new register.
4126 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4127 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4128 */
4129DECL_HIDDEN_THROW(uint8_t)
4130iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4131 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4132 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4133{
4134 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4135#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4136 AssertMsg( fSkipLivenessAssert
4137 || pReNative->idxCurCall == 0
4138 || enmGstReg == kIemNativeGstReg_Pc
4139 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4140 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4141 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4142 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4143 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4144 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4145#endif
4146 RT_NOREF(fSkipLivenessAssert);
4147#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4148 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4149#endif
4150 uint32_t const fRegMask = !fNoVolatileRegs
4151 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4152 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4153
4154 /*
4155 * First check if the guest register value is already in a host register.
4156 */
4157 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4158 {
4159 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4160 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4161 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4162 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4163
4164 /* It's not supposed to be allocated... */
4165 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4166 {
4167 /*
4168 * If the register will trash the guest shadow copy, try find a
4169 * completely unused register we can use instead. If that fails,
4170 * we need to disassociate the host reg from the guest reg.
4171 */
4172 /** @todo would be nice to know if preserving the register is in any way helpful. */
4173 /* If the purpose is calculations, try duplicate the register value as
4174 we'll be clobbering the shadow. */
4175 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4176 && ( ~pReNative->Core.bmHstRegs
4177 & ~pReNative->Core.bmHstRegsWithGstShadow
4178 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4179 {
4180 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4181
4182 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4183
4184 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4185 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4186 g_apszIemNativeHstRegNames[idxRegNew]));
4187 idxReg = idxRegNew;
4188 }
4189 /* If the current register matches the restrictions, go ahead and allocate
4190 it for the caller. */
4191 else if (fRegMask & RT_BIT_32(idxReg))
4192 {
4193 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4194 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4195 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4196 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4197 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4198 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4199 else
4200 {
4201 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4202 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4203 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4204 }
4205 }
4206 /* Otherwise, allocate a register that satisfies the caller and transfer
4207 the shadowing if compatible with the intended use. (This basically
4208 means the call wants a non-volatile register (RSP push/pop scenario).) */
4209 else
4210 {
4211 Assert(fNoVolatileRegs);
4212 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4213 !fNoVolatileRegs
4214 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4215 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4216 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4217 {
4218 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4219 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4220 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4221 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4222 }
4223 else
4224 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4225 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4226 g_apszIemNativeHstRegNames[idxRegNew]));
4227 idxReg = idxRegNew;
4228 }
4229 }
4230 else
4231 {
4232 /*
4233 * Oops. Shadowed guest register already allocated!
4234 *
4235 * Allocate a new register, copy the value and, if updating, the
4236 * guest shadow copy assignment to the new register.
4237 */
4238 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4239 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4240 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4241 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4242
4243 /** @todo share register for readonly access. */
4244 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4245 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4246
4247 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4248 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4249
4250 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4251 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4252 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4253 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4254 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4255 else
4256 {
4257 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4258 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4259 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4260 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4261 }
4262 idxReg = idxRegNew;
4263 }
4264 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4265
4266#ifdef VBOX_STRICT
4267 /* Strict builds: Check that the value is correct. */
4268 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4269#endif
4270
4271 return idxReg;
4272 }
4273
4274 /*
4275 * Allocate a new register, load it with the guest value and designate it as a copy of the
4276 */
4277 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4278
4279 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4280 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4281
4282 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4283 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4284 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4285 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4286
4287 return idxRegNew;
4288}
4289
4290
4291/**
4292 * Allocates a temporary host general purpose register that already holds the
4293 * given guest register value.
4294 *
4295 * The use case for this function is places where the shadowing state cannot be
4296 * modified due to branching and such. This will fail if the we don't have a
4297 * current shadow copy handy or if it's incompatible. The only code that will
4298 * be emitted here is value checking code in strict builds.
4299 *
4300 * The intended use can only be readonly!
4301 *
4302 * @returns The host register number, UINT8_MAX if not present.
4303 * @param pReNative The native recompile state.
4304 * @param poff Pointer to the instruction buffer offset.
4305 * Will be updated in strict builds if a register is
4306 * found.
4307 * @param enmGstReg The guest register that will is to be updated.
4308 * @note In strict builds, this may throw instruction buffer growth failures.
4309 * Non-strict builds will not throw anything.
4310 * @sa iemNativeRegAllocTmpForGuestReg
4311 */
4312DECL_HIDDEN_THROW(uint8_t)
4313iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4314{
4315 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4316#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4317 AssertMsg( pReNative->idxCurCall == 0
4318 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4319 || enmGstReg == kIemNativeGstReg_Pc,
4320 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4321#endif
4322
4323 /*
4324 * First check if the guest register value is already in a host register.
4325 */
4326 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4327 {
4328 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4329 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4330 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4331 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4332
4333 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4334 {
4335 /*
4336 * We only do readonly use here, so easy compared to the other
4337 * variant of this code.
4338 */
4339 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4340 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4341 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4342 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4343 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4344
4345#ifdef VBOX_STRICT
4346 /* Strict builds: Check that the value is correct. */
4347 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4348#else
4349 RT_NOREF(poff);
4350#endif
4351 return idxReg;
4352 }
4353 }
4354
4355 return UINT8_MAX;
4356}
4357
4358
4359/**
4360 * Allocates argument registers for a function call.
4361 *
4362 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4363 * need to check the return value.
4364 * @param pReNative The native recompile state.
4365 * @param off The current code buffer offset.
4366 * @param cArgs The number of arguments the function call takes.
4367 */
4368DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4369{
4370 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4371 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4372 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4373 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4374
4375 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4376 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4377 else if (cArgs == 0)
4378 return true;
4379
4380 /*
4381 * Do we get luck and all register are free and not shadowing anything?
4382 */
4383 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4384 for (uint32_t i = 0; i < cArgs; i++)
4385 {
4386 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4387 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4388 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4389 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4390 }
4391 /*
4392 * Okay, not lucky so we have to free up the registers.
4393 */
4394 else
4395 for (uint32_t i = 0; i < cArgs; i++)
4396 {
4397 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4398 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4399 {
4400 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4401 {
4402 case kIemNativeWhat_Var:
4403 {
4404 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4406 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4407 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4408 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4409
4410 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4411 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4412 else
4413 {
4414 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4415 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4416 }
4417 break;
4418 }
4419
4420 case kIemNativeWhat_Tmp:
4421 case kIemNativeWhat_Arg:
4422 case kIemNativeWhat_rc:
4423 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4424 default:
4425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4426 }
4427
4428 }
4429 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4430 {
4431 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4432 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4433 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4434 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4435 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4436 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4437 }
4438 else
4439 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4440 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4441 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4442 }
4443 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4444 return true;
4445}
4446
4447
4448DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4449
4450
4451#if 0
4452/**
4453 * Frees a register assignment of any type.
4454 *
4455 * @param pReNative The native recompile state.
4456 * @param idxHstReg The register to free.
4457 *
4458 * @note Does not update variables.
4459 */
4460DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4461{
4462 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4463 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4464 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4465 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4466 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4467 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4468 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4469 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4470 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4471 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4472 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4473 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4474 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4475 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4476
4477 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4478 /* no flushing, right:
4479 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4480 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4481 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4482 */
4483}
4484#endif
4485
4486
4487/**
4488 * Frees a temporary register.
4489 *
4490 * Any shadow copies of guest registers assigned to the host register will not
4491 * be flushed by this operation.
4492 */
4493DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4494{
4495 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4496 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4497 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4498 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4499 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4500}
4501
4502
4503/**
4504 * Frees a temporary immediate register.
4505 *
4506 * It is assumed that the call has not modified the register, so it still hold
4507 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4508 */
4509DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4510{
4511 iemNativeRegFreeTmp(pReNative, idxHstReg);
4512}
4513
4514
4515/**
4516 * Frees a register assigned to a variable.
4517 *
4518 * The register will be disassociated from the variable.
4519 */
4520DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4521{
4522 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4523 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4524 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4526 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4527
4528 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4529 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4530 if (!fFlushShadows)
4531 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4532 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4533 else
4534 {
4535 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4536 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4537 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4538 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4539 uint64_t fGstRegShadows = fGstRegShadowsOld;
4540 while (fGstRegShadows)
4541 {
4542 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4543 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4544
4545 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4546 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4547 }
4548 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4549 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4550 }
4551}
4552
4553
4554/**
4555 * Called right before emitting a call instruction to move anything important
4556 * out of call-volatile registers, free and flush the call-volatile registers,
4557 * optionally freeing argument variables.
4558 *
4559 * @returns New code buffer offset, UINT32_MAX on failure.
4560 * @param pReNative The native recompile state.
4561 * @param off The code buffer offset.
4562 * @param cArgs The number of arguments the function call takes.
4563 * It is presumed that the host register part of these have
4564 * been allocated as such already and won't need moving,
4565 * just freeing.
4566 * @param fKeepVars Mask of variables that should keep their register
4567 * assignments. Caller must take care to handle these.
4568 */
4569DECL_HIDDEN_THROW(uint32_t)
4570iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4571{
4572 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4573
4574 /* fKeepVars will reduce this mask. */
4575 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4576
4577 /*
4578 * Move anything important out of volatile registers.
4579 */
4580 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4581 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4582 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4583#ifdef IEMNATIVE_REG_FIXED_TMP0
4584 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4585#endif
4586#ifdef IEMNATIVE_REG_FIXED_TMP1
4587 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4588#endif
4589#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4590 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4591#endif
4592 & ~g_afIemNativeCallRegs[cArgs];
4593
4594 fRegsToMove &= pReNative->Core.bmHstRegs;
4595 if (!fRegsToMove)
4596 { /* likely */ }
4597 else
4598 {
4599 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4600 while (fRegsToMove != 0)
4601 {
4602 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4603 fRegsToMove &= ~RT_BIT_32(idxReg);
4604
4605 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4606 {
4607 case kIemNativeWhat_Var:
4608 {
4609 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4610 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4611 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4612 Assert(pVar->idxReg == idxReg);
4613 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4614 {
4615 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4616 idxVar, pVar->enmKind, pVar->idxReg));
4617 if (pVar->enmKind != kIemNativeVarKind_Stack)
4618 pVar->idxReg = UINT8_MAX;
4619 else
4620 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4621 }
4622 else
4623 fRegsToFree &= ~RT_BIT_32(idxReg);
4624 continue;
4625 }
4626
4627 case kIemNativeWhat_Arg:
4628 AssertMsgFailed(("What?!?: %u\n", idxReg));
4629 continue;
4630
4631 case kIemNativeWhat_rc:
4632 case kIemNativeWhat_Tmp:
4633 AssertMsgFailed(("Missing free: %u\n", idxReg));
4634 continue;
4635
4636 case kIemNativeWhat_FixedTmp:
4637 case kIemNativeWhat_pVCpuFixed:
4638 case kIemNativeWhat_pCtxFixed:
4639 case kIemNativeWhat_PcShadow:
4640 case kIemNativeWhat_FixedReserved:
4641 case kIemNativeWhat_Invalid:
4642 case kIemNativeWhat_End:
4643 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4644 }
4645 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4646 }
4647 }
4648
4649 /*
4650 * Do the actual freeing.
4651 */
4652 if (pReNative->Core.bmHstRegs & fRegsToFree)
4653 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4654 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4655 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4656
4657 /* If there are guest register shadows in any call-volatile register, we
4658 have to clear the corrsponding guest register masks for each register. */
4659 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4660 if (fHstRegsWithGstShadow)
4661 {
4662 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4663 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4664 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4665 do
4666 {
4667 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4668 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4669
4670 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4671 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4672 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4673 } while (fHstRegsWithGstShadow != 0);
4674 }
4675
4676 return off;
4677}
4678
4679
4680/**
4681 * Flushes a set of guest register shadow copies.
4682 *
4683 * This is usually done after calling a threaded function or a C-implementation
4684 * of an instruction.
4685 *
4686 * @param pReNative The native recompile state.
4687 * @param fGstRegs Set of guest registers to flush.
4688 */
4689DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4690{
4691 /*
4692 * Reduce the mask by what's currently shadowed
4693 */
4694 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4695 fGstRegs &= bmGstRegShadowsOld;
4696 if (fGstRegs)
4697 {
4698 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4699 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4700 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4701 if (bmGstRegShadowsNew)
4702 {
4703 /*
4704 * Partial.
4705 */
4706 do
4707 {
4708 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4709 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4710 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4711 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4712 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4713
4714 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4715 fGstRegs &= ~fInThisHstReg;
4716 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4717 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4718 if (!fGstRegShadowsNew)
4719 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4720 } while (fGstRegs != 0);
4721 }
4722 else
4723 {
4724 /*
4725 * Clear all.
4726 */
4727 do
4728 {
4729 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4730 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4731 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4732 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4733 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4734
4735 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4736 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4737 } while (fGstRegs != 0);
4738 pReNative->Core.bmHstRegsWithGstShadow = 0;
4739 }
4740 }
4741}
4742
4743
4744/**
4745 * Flushes guest register shadow copies held by a set of host registers.
4746 *
4747 * This is used with the TLB lookup code for ensuring that we don't carry on
4748 * with any guest shadows in volatile registers, as these will get corrupted by
4749 * a TLB miss.
4750 *
4751 * @param pReNative The native recompile state.
4752 * @param fHstRegs Set of host registers to flush guest shadows for.
4753 */
4754DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4755{
4756 /*
4757 * Reduce the mask by what's currently shadowed.
4758 */
4759 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4760 fHstRegs &= bmHstRegsWithGstShadowOld;
4761 if (fHstRegs)
4762 {
4763 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4764 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4765 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4766 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4767 if (bmHstRegsWithGstShadowNew)
4768 {
4769 /*
4770 * Partial (likely).
4771 */
4772 uint64_t fGstShadows = 0;
4773 do
4774 {
4775 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4776 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4777 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4778 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4779
4780 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4781 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4782 fHstRegs &= ~RT_BIT_32(idxHstReg);
4783 } while (fHstRegs != 0);
4784 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4785 }
4786 else
4787 {
4788 /*
4789 * Clear all.
4790 */
4791 do
4792 {
4793 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4794 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4795 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4796 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4797
4798 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4799 fHstRegs &= ~RT_BIT_32(idxHstReg);
4800 } while (fHstRegs != 0);
4801 pReNative->Core.bmGstRegShadows = 0;
4802 }
4803 }
4804}
4805
4806
4807/**
4808 * Restores guest shadow copies in volatile registers.
4809 *
4810 * This is used after calling a helper function (think TLB miss) to restore the
4811 * register state of volatile registers.
4812 *
4813 * @param pReNative The native recompile state.
4814 * @param off The code buffer offset.
4815 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4816 * be active (allocated) w/o asserting. Hack.
4817 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4818 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4819 */
4820DECL_HIDDEN_THROW(uint32_t)
4821iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4822{
4823 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4824 if (fHstRegs)
4825 {
4826 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4827 do
4828 {
4829 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4830
4831 /* It's not fatal if a register is active holding a variable that
4832 shadowing a guest register, ASSUMING all pending guest register
4833 writes were flushed prior to the helper call. However, we'll be
4834 emitting duplicate restores, so it wasts code space. */
4835 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4836 RT_NOREF(fHstRegsActiveShadows);
4837
4838 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4839 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4840 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4841 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4842
4843 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4844 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4845
4846 fHstRegs &= ~RT_BIT_32(idxHstReg);
4847 } while (fHstRegs != 0);
4848 }
4849 return off;
4850}
4851
4852
4853
4854
4855/*********************************************************************************************************************************
4856* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4857*********************************************************************************************************************************/
4858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4859
4860/**
4861 * Info about shadowed guest SIMD register values.
4862 * @see IEMNATIVEGSTSIMDREG
4863 */
4864static struct
4865{
4866 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4867 uint32_t offXmm;
4868 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4869 uint32_t offYmm;
4870 /** Name (for logging). */
4871 const char *pszName;
4872} const g_aGstSimdShadowInfo[] =
4873{
4874#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4875 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4876 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4877 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4878 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4879 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4880 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4881 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4882 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4883 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4884 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4885 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4886 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4887 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4888 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4889 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4890 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4891 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4892#undef CPUMCTX_OFF_AND_SIZE
4893};
4894AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4895
4896
4897#ifdef LOG_ENABLED
4898/** Host CPU SIMD register names. */
4899DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4900{
4901#ifdef RT_ARCH_AMD64
4902 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4903#elif RT_ARCH_ARM64
4904 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4905 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4906#else
4907# error "port me"
4908#endif
4909};
4910#endif
4911
4912
4913/**
4914 * Frees a temporary SIMD register.
4915 *
4916 * Any shadow copies of guest registers assigned to the host register will not
4917 * be flushed by this operation.
4918 */
4919DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4920{
4921 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4922 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4923 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4924 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4925 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4926}
4927
4928
4929/**
4930 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4931 *
4932 * @returns New code bufferoffset.
4933 * @param pReNative The native recompile state.
4934 * @param off Current code buffer position.
4935 * @param enmGstSimdReg The guest SIMD register to flush.
4936 */
4937DECL_HIDDEN_THROW(uint32_t)
4938iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4939{
4940 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4941
4942 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4943 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4944 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4945 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4946
4947 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4948 {
4949 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4950 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4951 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4952 }
4953
4954 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4955 {
4956 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4957 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4958 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4959 }
4960
4961 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4962 return off;
4963}
4964
4965
4966/**
4967 * Locate a register, possibly freeing one up.
4968 *
4969 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4970 * failed.
4971 *
4972 * @returns Host register number on success. Returns UINT8_MAX if no registers
4973 * found, the caller is supposed to deal with this and raise a
4974 * allocation type specific status code (if desired).
4975 *
4976 * @throws VBox status code if we're run into trouble spilling a variable of
4977 * recording debug info. Does NOT throw anything if we're out of
4978 * registers, though.
4979 */
4980static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4981 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4982{
4983 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4984 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4985 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4986
4987 /*
4988 * Try a freed register that's shadowing a guest register.
4989 */
4990 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4991 if (fRegs)
4992 {
4993 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4994
4995#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4996 /*
4997 * When we have livness information, we use it to kick out all shadowed
4998 * guest register that will not be needed any more in this TB. If we're
4999 * lucky, this may prevent us from ending up here again.
5000 *
5001 * Note! We must consider the previous entry here so we don't free
5002 * anything that the current threaded function requires (current
5003 * entry is produced by the next threaded function).
5004 */
5005 uint32_t const idxCurCall = pReNative->idxCurCall;
5006 if (idxCurCall > 0)
5007 {
5008 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5009
5010# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5011 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5012 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5013 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5014#else
5015 /* Construct a mask of the registers not in the read or write state.
5016 Note! We could skips writes, if they aren't from us, as this is just
5017 a hack to prevent trashing registers that have just been written
5018 or will be written when we retire the current instruction. */
5019 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5020 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5021 & IEMLIVENESSBIT_MASK;
5022#endif
5023 /* If it matches any shadowed registers. */
5024 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5025 {
5026 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5027 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5028 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5029
5030 /* See if we've got any unshadowed registers we can return now. */
5031 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5032 if (fUnshadowedRegs)
5033 {
5034 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5035 return (fPreferVolatile
5036 ? ASMBitFirstSetU32(fUnshadowedRegs)
5037 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5038 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5039 - 1;
5040 }
5041 }
5042 }
5043#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5044
5045 unsigned const idxReg = (fPreferVolatile
5046 ? ASMBitFirstSetU32(fRegs)
5047 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5048 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5049 - 1;
5050
5051 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5052 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5053 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5054 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5055
5056 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5057 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5058 uint32_t idxGstSimdReg = 0;
5059 do
5060 {
5061 if (fGstRegShadows & 0x1)
5062 {
5063 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5064 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5065 }
5066 idxGstSimdReg++;
5067 fGstRegShadows >>= 1;
5068 } while (fGstRegShadows);
5069
5070 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5071 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5072 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5073 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5074 return idxReg;
5075 }
5076
5077 /*
5078 * Try free up a variable that's in a register.
5079 *
5080 * We do two rounds here, first evacuating variables we don't need to be
5081 * saved on the stack, then in the second round move things to the stack.
5082 */
5083 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5084 AssertReleaseFailed(); /** @todo No variable support right now. */
5085#if 0
5086 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5087 {
5088 uint32_t fVars = pReNative->Core.bmSimdVars;
5089 while (fVars)
5090 {
5091 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5092 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5093 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5094 && (RT_BIT_32(idxReg) & fRegMask)
5095 && ( iLoop == 0
5096 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5097 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5098 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5099 {
5100 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5101 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5102 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5103 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5104 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5105 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5106
5107 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5108 {
5109 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5110 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5111 }
5112
5113 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5114 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5115
5116 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5117 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5118 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5119 return idxReg;
5120 }
5121 fVars &= ~RT_BIT_32(idxVar);
5122 }
5123 }
5124#endif
5125
5126 AssertFailed();
5127 return UINT8_MAX;
5128}
5129
5130
5131/**
5132 * Flushes a set of guest register shadow copies.
5133 *
5134 * This is usually done after calling a threaded function or a C-implementation
5135 * of an instruction.
5136 *
5137 * @param pReNative The native recompile state.
5138 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5139 */
5140DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5141{
5142 /*
5143 * Reduce the mask by what's currently shadowed
5144 */
5145 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5146 fGstSimdRegs &= bmGstSimdRegShadows;
5147 if (fGstSimdRegs)
5148 {
5149 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5150 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5151 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5152 if (bmGstSimdRegShadowsNew)
5153 {
5154 /*
5155 * Partial.
5156 */
5157 do
5158 {
5159 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5160 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5161 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5162 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5163 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5164 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5165
5166 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5167 fGstSimdRegs &= ~fInThisHstReg;
5168 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5169 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5170 if (!fGstRegShadowsNew)
5171 {
5172 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5173 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5174 }
5175 } while (fGstSimdRegs != 0);
5176 }
5177 else
5178 {
5179 /*
5180 * Clear all.
5181 */
5182 do
5183 {
5184 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5185 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5186 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5187 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5188 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5189 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5190
5191 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5192 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5193 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5194 } while (fGstSimdRegs != 0);
5195 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5196 }
5197 }
5198}
5199
5200
5201/**
5202 * Allocates a temporary host SIMD register.
5203 *
5204 * This may emit code to save register content onto the stack in order to free
5205 * up a register.
5206 *
5207 * @returns The host register number; throws VBox status code on failure,
5208 * so no need to check the return value.
5209 * @param pReNative The native recompile state.
5210 * @param poff Pointer to the variable with the code buffer position.
5211 * This will be update if we need to move a variable from
5212 * register to stack in order to satisfy the request.
5213 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5214 * registers (@c true, default) or the other way around
5215 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5216 */
5217DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5218{
5219 /*
5220 * Try find a completely unused register, preferably a call-volatile one.
5221 */
5222 uint8_t idxSimdReg;
5223 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5224 & ~pReNative->Core.bmHstRegsWithGstShadow
5225 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5226 if (fRegs)
5227 {
5228 if (fPreferVolatile)
5229 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5230 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5231 else
5232 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5233 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5234 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5235 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5236 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5237 }
5238 else
5239 {
5240 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5241 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5242 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5243 }
5244
5245 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5246 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5247}
5248
5249
5250/**
5251 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5252 * registers.
5253 *
5254 * @returns The host register number; throws VBox status code on failure,
5255 * so no need to check the return value.
5256 * @param pReNative The native recompile state.
5257 * @param poff Pointer to the variable with the code buffer position.
5258 * This will be update if we need to move a variable from
5259 * register to stack in order to satisfy the request.
5260 * @param fRegMask Mask of acceptable registers.
5261 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5262 * registers (@c true, default) or the other way around
5263 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5264 */
5265DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5266 bool fPreferVolatile /*= true*/)
5267{
5268 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5269 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5270
5271 /*
5272 * Try find a completely unused register, preferably a call-volatile one.
5273 */
5274 uint8_t idxSimdReg;
5275 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5276 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5277 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5278 & fRegMask;
5279 if (fRegs)
5280 {
5281 if (fPreferVolatile)
5282 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5283 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5284 else
5285 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5286 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5287 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5288 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5289 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5290 }
5291 else
5292 {
5293 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5294 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5295 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5296 }
5297
5298 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5299 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5300}
5301
5302
5303/**
5304 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5305 *
5306 * @param pReNative The native recompile state.
5307 * @param idxHstSimdReg The host SIMD register to update the state for.
5308 * @param enmLoadSz The load size to set.
5309 */
5310DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5311 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5312{
5313 /* Everything valid already? -> nothing to do. */
5314 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5315 return;
5316
5317 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5318 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5319 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5320 {
5321 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5322 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5323 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5324 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5325 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5326 }
5327}
5328
5329
5330static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5331 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5332{
5333 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5334 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5335 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5336 {
5337# ifdef RT_ARCH_ARM64
5338 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5339 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5340# endif
5341
5342 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5343 {
5344 switch (enmLoadSzDst)
5345 {
5346 case kIemNativeGstSimdRegLdStSz_256:
5347 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5348 break;
5349 case kIemNativeGstSimdRegLdStSz_Low128:
5350 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5351 break;
5352 case kIemNativeGstSimdRegLdStSz_High128:
5353 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5354 break;
5355 default:
5356 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5357 }
5358
5359 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5360 }
5361 }
5362 else
5363 {
5364 /* Complicated stuff where the source is currently missing something, later. */
5365 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5366 }
5367
5368 return off;
5369}
5370
5371
5372/**
5373 * Allocates a temporary host SIMD register for keeping a guest
5374 * SIMD register value.
5375 *
5376 * Since we may already have a register holding the guest register value,
5377 * code will be emitted to do the loading if that's not the case. Code may also
5378 * be emitted if we have to free up a register to satify the request.
5379 *
5380 * @returns The host register number; throws VBox status code on failure, so no
5381 * need to check the return value.
5382 * @param pReNative The native recompile state.
5383 * @param poff Pointer to the variable with the code buffer
5384 * position. This will be update if we need to move a
5385 * variable from register to stack in order to satisfy
5386 * the request.
5387 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5388 * @param enmIntendedUse How the caller will be using the host register.
5389 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5390 * register is okay (default). The ASSUMPTION here is
5391 * that the caller has already flushed all volatile
5392 * registers, so this is only applied if we allocate a
5393 * new register.
5394 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5395 */
5396DECL_HIDDEN_THROW(uint8_t)
5397iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5398 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5399 bool fNoVolatileRegs /*= false*/)
5400{
5401 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5402#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5403 AssertMsg( pReNative->idxCurCall == 0
5404 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5405 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5406 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5407 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5408 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5409 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5410#endif
5411#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5412 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5413#endif
5414 uint32_t const fRegMask = !fNoVolatileRegs
5415 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5416 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5417
5418 /*
5419 * First check if the guest register value is already in a host register.
5420 */
5421 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5422 {
5423 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5424 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5425 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5426 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5427
5428 /* It's not supposed to be allocated... */
5429 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5430 {
5431 /*
5432 * If the register will trash the guest shadow copy, try find a
5433 * completely unused register we can use instead. If that fails,
5434 * we need to disassociate the host reg from the guest reg.
5435 */
5436 /** @todo would be nice to know if preserving the register is in any way helpful. */
5437 /* If the purpose is calculations, try duplicate the register value as
5438 we'll be clobbering the shadow. */
5439 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5440 && ( ~pReNative->Core.bmHstSimdRegs
5441 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5442 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5443 {
5444 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5445
5446 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5447
5448 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5449 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5450 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5451 idxSimdReg = idxRegNew;
5452 }
5453 /* If the current register matches the restrictions, go ahead and allocate
5454 it for the caller. */
5455 else if (fRegMask & RT_BIT_32(idxSimdReg))
5456 {
5457 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5458 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5459 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5460 {
5461 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5462 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5463 else
5464 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5465 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5466 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5467 }
5468 else
5469 {
5470 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5471 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5472 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5473 }
5474 }
5475 /* Otherwise, allocate a register that satisfies the caller and transfer
5476 the shadowing if compatible with the intended use. (This basically
5477 means the call wants a non-volatile register (RSP push/pop scenario).) */
5478 else
5479 {
5480 Assert(fNoVolatileRegs);
5481 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5482 !fNoVolatileRegs
5483 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5484 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5485 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5486 {
5487 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5488 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5489 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5490 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5491 }
5492 else
5493 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5494 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5495 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5496 idxSimdReg = idxRegNew;
5497 }
5498 }
5499 else
5500 {
5501 /*
5502 * Oops. Shadowed guest register already allocated!
5503 *
5504 * Allocate a new register, copy the value and, if updating, the
5505 * guest shadow copy assignment to the new register.
5506 */
5507 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5508 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5509 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5510 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5511
5512 /** @todo share register for readonly access. */
5513 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5514 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5515
5516 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5517 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5518 else
5519 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5520
5521 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5522 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5523 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5524 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5525 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5526 else
5527 {
5528 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5529 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5530 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5531 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5532 }
5533 idxSimdReg = idxRegNew;
5534 }
5535 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5536
5537#ifdef VBOX_STRICT
5538 /* Strict builds: Check that the value is correct. */
5539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5540 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5541#endif
5542
5543 return idxSimdReg;
5544 }
5545
5546 /*
5547 * Allocate a new register, load it with the guest value and designate it as a copy of the
5548 */
5549 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5550
5551 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5552 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5553 else
5554 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5555
5556 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5557 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5558
5559 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5560 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5561
5562 return idxRegNew;
5563}
5564
5565#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5566
5567
5568
5569/*********************************************************************************************************************************
5570* Code emitters for flushing pending guest register writes and sanity checks *
5571*********************************************************************************************************************************/
5572
5573#ifdef VBOX_STRICT
5574/**
5575 * Does internal register allocator sanity checks.
5576 */
5577DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5578{
5579 /*
5580 * Iterate host registers building a guest shadowing set.
5581 */
5582 uint64_t bmGstRegShadows = 0;
5583 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5584 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5585 while (bmHstRegsWithGstShadow)
5586 {
5587 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5588 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5589 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5590
5591 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5592 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5593 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5594 bmGstRegShadows |= fThisGstRegShadows;
5595 while (fThisGstRegShadows)
5596 {
5597 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5598 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5599 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5600 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5601 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5602 }
5603 }
5604 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5605 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5606 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5607
5608 /*
5609 * Now the other way around, checking the guest to host index array.
5610 */
5611 bmHstRegsWithGstShadow = 0;
5612 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5613 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5614 while (bmGstRegShadows)
5615 {
5616 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5617 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5618 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5619
5620 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5621 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5622 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5623 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5624 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5625 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5626 }
5627 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5628 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5629 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5630}
5631#endif /* VBOX_STRICT */
5632
5633
5634/**
5635 * Flushes any delayed guest register writes.
5636 *
5637 * This must be called prior to calling CImpl functions and any helpers that use
5638 * the guest state (like raising exceptions) and such.
5639 *
5640 * This optimization has not yet been implemented. The first target would be
5641 * RIP updates, since these are the most common ones.
5642 */
5643DECL_HIDDEN_THROW(uint32_t)
5644iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5645{
5646#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5647 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5648 off = iemNativeEmitPcWriteback(pReNative, off);
5649#else
5650 RT_NOREF(pReNative, fGstShwExcept);
5651#endif
5652
5653#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5654 /** @todo r=bird: There must be a quicker way to check if anything needs
5655 * doing and then call simd function to do the flushing */
5656 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5657 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5658 {
5659 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5660 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5661
5662 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5663 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5664
5665 if ( fFlushShadows
5666 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5667 {
5668 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5669
5670 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5671 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5672 }
5673 }
5674#else
5675 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5676#endif
5677
5678 return off;
5679}
5680
5681
5682#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5683/**
5684 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5685 */
5686DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5687{
5688 Assert(pReNative->Core.offPc);
5689# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5690 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5691 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5692# endif
5693
5694# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5695 /* Allocate a temporary PC register. */
5696 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5697
5698 /* Perform the addition and store the result. */
5699 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5700 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5701
5702 /* Free but don't flush the PC register. */
5703 iemNativeRegFreeTmp(pReNative, idxPcReg);
5704# else
5705 /* Compare the shadow with the context value, they should match. */
5706 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5707 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5708# endif
5709
5710 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5711 pReNative->Core.offPc = 0;
5712 pReNative->Core.cInstrPcUpdateSkipped = 0;
5713
5714 return off;
5715}
5716#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5717
5718
5719/*********************************************************************************************************************************
5720* Code Emitters (larger snippets) *
5721*********************************************************************************************************************************/
5722
5723/**
5724 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5725 * extending to 64-bit width.
5726 *
5727 * @returns New code buffer offset on success, UINT32_MAX on failure.
5728 * @param pReNative .
5729 * @param off The current code buffer position.
5730 * @param idxHstReg The host register to load the guest register value into.
5731 * @param enmGstReg The guest register to load.
5732 *
5733 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5734 * that is something the caller needs to do if applicable.
5735 */
5736DECL_HIDDEN_THROW(uint32_t)
5737iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5738{
5739 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5740 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5741
5742 switch (g_aGstShadowInfo[enmGstReg].cb)
5743 {
5744 case sizeof(uint64_t):
5745 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5746 case sizeof(uint32_t):
5747 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5748 case sizeof(uint16_t):
5749 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5750#if 0 /* not present in the table. */
5751 case sizeof(uint8_t):
5752 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5753#endif
5754 default:
5755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5756 }
5757}
5758
5759
5760#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5761/**
5762 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5763 *
5764 * @returns New code buffer offset on success, UINT32_MAX on failure.
5765 * @param pReNative The recompiler state.
5766 * @param off The current code buffer position.
5767 * @param idxHstSimdReg The host register to load the guest register value into.
5768 * @param enmGstSimdReg The guest register to load.
5769 * @param enmLoadSz The load size of the register.
5770 *
5771 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5772 * that is something the caller needs to do if applicable.
5773 */
5774DECL_HIDDEN_THROW(uint32_t)
5775iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5776 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5777{
5778 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5779
5780 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5781 switch (enmLoadSz)
5782 {
5783 case kIemNativeGstSimdRegLdStSz_256:
5784 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5785 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5786 case kIemNativeGstSimdRegLdStSz_Low128:
5787 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5788 case kIemNativeGstSimdRegLdStSz_High128:
5789 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5790 default:
5791 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5792 }
5793}
5794#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5795
5796#ifdef VBOX_STRICT
5797
5798/**
5799 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5800 *
5801 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5802 * Trashes EFLAGS on AMD64.
5803 */
5804DECL_HIDDEN_THROW(uint32_t)
5805iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5806{
5807# ifdef RT_ARCH_AMD64
5808 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5809
5810 /* rol reg64, 32 */
5811 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5812 pbCodeBuf[off++] = 0xc1;
5813 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5814 pbCodeBuf[off++] = 32;
5815
5816 /* test reg32, ffffffffh */
5817 if (idxReg >= 8)
5818 pbCodeBuf[off++] = X86_OP_REX_B;
5819 pbCodeBuf[off++] = 0xf7;
5820 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5821 pbCodeBuf[off++] = 0xff;
5822 pbCodeBuf[off++] = 0xff;
5823 pbCodeBuf[off++] = 0xff;
5824 pbCodeBuf[off++] = 0xff;
5825
5826 /* je/jz +1 */
5827 pbCodeBuf[off++] = 0x74;
5828 pbCodeBuf[off++] = 0x01;
5829
5830 /* int3 */
5831 pbCodeBuf[off++] = 0xcc;
5832
5833 /* rol reg64, 32 */
5834 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5835 pbCodeBuf[off++] = 0xc1;
5836 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5837 pbCodeBuf[off++] = 32;
5838
5839# elif defined(RT_ARCH_ARM64)
5840 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5841 /* lsr tmp0, reg64, #32 */
5842 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5843 /* cbz tmp0, +1 */
5844 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5845 /* brk #0x1100 */
5846 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5847
5848# else
5849# error "Port me!"
5850# endif
5851 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5852 return off;
5853}
5854
5855
5856/**
5857 * Emitting code that checks that the content of register @a idxReg is the same
5858 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5859 * instruction if that's not the case.
5860 *
5861 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5862 * Trashes EFLAGS on AMD64.
5863 */
5864DECL_HIDDEN_THROW(uint32_t)
5865iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5866{
5867# ifdef RT_ARCH_AMD64
5868 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5869
5870 /* cmp reg, [mem] */
5871 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5872 {
5873 if (idxReg >= 8)
5874 pbCodeBuf[off++] = X86_OP_REX_R;
5875 pbCodeBuf[off++] = 0x38;
5876 }
5877 else
5878 {
5879 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5880 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5881 else
5882 {
5883 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5884 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5885 else
5886 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5887 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5888 if (idxReg >= 8)
5889 pbCodeBuf[off++] = X86_OP_REX_R;
5890 }
5891 pbCodeBuf[off++] = 0x39;
5892 }
5893 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5894
5895 /* je/jz +1 */
5896 pbCodeBuf[off++] = 0x74;
5897 pbCodeBuf[off++] = 0x01;
5898
5899 /* int3 */
5900 pbCodeBuf[off++] = 0xcc;
5901
5902 /* For values smaller than the register size, we must check that the rest
5903 of the register is all zeros. */
5904 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5905 {
5906 /* test reg64, imm32 */
5907 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5908 pbCodeBuf[off++] = 0xf7;
5909 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5910 pbCodeBuf[off++] = 0;
5911 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5912 pbCodeBuf[off++] = 0xff;
5913 pbCodeBuf[off++] = 0xff;
5914
5915 /* je/jz +1 */
5916 pbCodeBuf[off++] = 0x74;
5917 pbCodeBuf[off++] = 0x01;
5918
5919 /* int3 */
5920 pbCodeBuf[off++] = 0xcc;
5921 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5922 }
5923 else
5924 {
5925 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5926 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5927 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5928 }
5929
5930# elif defined(RT_ARCH_ARM64)
5931 /* mov TMP0, [gstreg] */
5932 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5933
5934 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5935 /* sub tmp0, tmp0, idxReg */
5936 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5937 /* cbz tmp0, +1 */
5938 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5939 /* brk #0x1000+enmGstReg */
5940 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5942
5943# else
5944# error "Port me!"
5945# endif
5946 return off;
5947}
5948
5949
5950# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5951/**
5952 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5953 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5954 * instruction if that's not the case.
5955 *
5956 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5957 * Trashes EFLAGS on AMD64.
5958 */
5959DECL_HIDDEN_THROW(uint32_t)
5960iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5961 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5962{
5963 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5964 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5965 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5966 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5967 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5968 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5969 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5970 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5971 return off;
5972
5973# ifdef RT_ARCH_AMD64
5974 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
5975
5976 /* movdqa vectmp0, idxSimdReg */
5977 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5978
5979 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5980
5981 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5982 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5983 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
5984 pbCodeBuf[off++] = X86_OP_REX_R;
5985 pbCodeBuf[off++] = 0x0f;
5986 pbCodeBuf[off++] = 0x38;
5987 pbCodeBuf[off++] = 0x29;
5988 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5989
5990 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5991 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5992 pbCodeBuf[off++] = X86_OP_REX_W
5993 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
5994 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5995 pbCodeBuf[off++] = 0x0f;
5996 pbCodeBuf[off++] = 0x3a;
5997 pbCodeBuf[off++] = 0x16;
5998 pbCodeBuf[off++] = 0xeb;
5999 pbCodeBuf[off++] = 0x00;
6000
6001 /* cmp tmp0, 0xffffffffffffffff. */
6002 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6003 pbCodeBuf[off++] = 0x83;
6004 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6005 pbCodeBuf[off++] = 0xff;
6006
6007 /* je/jz +1 */
6008 pbCodeBuf[off++] = 0x74;
6009 pbCodeBuf[off++] = 0x01;
6010
6011 /* int3 */
6012 pbCodeBuf[off++] = 0xcc;
6013
6014 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6015 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6016 pbCodeBuf[off++] = X86_OP_REX_W
6017 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6018 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6019 pbCodeBuf[off++] = 0x0f;
6020 pbCodeBuf[off++] = 0x3a;
6021 pbCodeBuf[off++] = 0x16;
6022 pbCodeBuf[off++] = 0xeb;
6023 pbCodeBuf[off++] = 0x01;
6024
6025 /* cmp tmp0, 0xffffffffffffffff. */
6026 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6027 pbCodeBuf[off++] = 0x83;
6028 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6029 pbCodeBuf[off++] = 0xff;
6030
6031 /* je/jz +1 */
6032 pbCodeBuf[off++] = 0x74;
6033 pbCodeBuf[off++] = 0x01;
6034
6035 /* int3 */
6036 pbCodeBuf[off++] = 0xcc;
6037
6038# elif defined(RT_ARCH_ARM64)
6039 /* mov vectmp0, [gstreg] */
6040 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6041
6042 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6043 {
6044 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6045 /* eor vectmp0, vectmp0, idxSimdReg */
6046 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6047 /* cnt vectmp0, vectmp0, #0*/
6048 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6049 /* umov tmp0, vectmp0.D[0] */
6050 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6051 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6052 /* cbz tmp0, +1 */
6053 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6054 /* brk #0x1000+enmGstReg */
6055 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6056 }
6057
6058 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6059 {
6060 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6061 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6062 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6063 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6064 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6065 /* umov tmp0, (vectmp0 + 1).D[0] */
6066 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6067 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6068 /* cbz tmp0, +1 */
6069 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6070 /* brk #0x1000+enmGstReg */
6071 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6072 }
6073
6074# else
6075# error "Port me!"
6076# endif
6077
6078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6079 return off;
6080}
6081# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6082
6083
6084/**
6085 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6086 * important bits.
6087 *
6088 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6089 * Trashes EFLAGS on AMD64.
6090 */
6091DECL_HIDDEN_THROW(uint32_t)
6092iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6093{
6094 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6095 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6096 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6097 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6098
6099#ifdef RT_ARCH_AMD64
6100 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6101
6102 /* je/jz +1 */
6103 pbCodeBuf[off++] = 0x74;
6104 pbCodeBuf[off++] = 0x01;
6105
6106 /* int3 */
6107 pbCodeBuf[off++] = 0xcc;
6108
6109# elif defined(RT_ARCH_ARM64)
6110 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6111
6112 /* b.eq +1 */
6113 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6114 /* brk #0x2000 */
6115 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6116
6117# else
6118# error "Port me!"
6119# endif
6120 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6121
6122 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6123 return off;
6124}
6125
6126#endif /* VBOX_STRICT */
6127
6128
6129#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6130/**
6131 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6132 */
6133DECL_HIDDEN_THROW(uint32_t)
6134iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6135{
6136 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6137
6138 fEflNeeded &= X86_EFL_STATUS_BITS;
6139 if (fEflNeeded)
6140 {
6141# ifdef RT_ARCH_AMD64
6142 /* test dword [pVCpu + offVCpu], imm32 */
6143 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6144 if (fEflNeeded <= 0xff)
6145 {
6146 pCodeBuf[off++] = 0xf6;
6147 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6148 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6149 }
6150 else
6151 {
6152 pCodeBuf[off++] = 0xf7;
6153 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6154 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6155 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6156 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6157 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6158 }
6159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6160
6161# else
6162 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6163 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6164 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6165# ifdef RT_ARCH_ARM64
6166 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6167 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6168# else
6169# error "Port me!"
6170# endif
6171 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6172# endif
6173 }
6174 return off;
6175}
6176#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6177
6178
6179/**
6180 * Emits a code for checking the return code of a call and rcPassUp, returning
6181 * from the code if either are non-zero.
6182 */
6183DECL_HIDDEN_THROW(uint32_t)
6184iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6185{
6186#ifdef RT_ARCH_AMD64
6187 /*
6188 * AMD64: eax = call status code.
6189 */
6190
6191 /* edx = rcPassUp */
6192 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6193# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6194 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6195# endif
6196
6197 /* edx = eax | rcPassUp */
6198 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6199 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6200 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6201 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6202
6203 /* Jump to non-zero status return path. */
6204 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6205
6206 /* done. */
6207
6208#elif RT_ARCH_ARM64
6209 /*
6210 * ARM64: w0 = call status code.
6211 */
6212# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6213 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6214# endif
6215 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6216
6217 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6218
6219 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6220
6221 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6222 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6223 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6224
6225#else
6226# error "port me"
6227#endif
6228 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6229 RT_NOREF_PV(idxInstr);
6230 return off;
6231}
6232
6233
6234/**
6235 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6236 * raising a \#GP(0) if it isn't.
6237 *
6238 * @returns New code buffer offset, UINT32_MAX on failure.
6239 * @param pReNative The native recompile state.
6240 * @param off The code buffer offset.
6241 * @param idxAddrReg The host register with the address to check.
6242 * @param idxInstr The current instruction.
6243 */
6244DECL_HIDDEN_THROW(uint32_t)
6245iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6246{
6247 /*
6248 * Make sure we don't have any outstanding guest register writes as we may
6249 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6250 */
6251 off = iemNativeRegFlushPendingWrites(pReNative, off);
6252
6253#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6254 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6255#else
6256 RT_NOREF(idxInstr);
6257#endif
6258
6259#ifdef RT_ARCH_AMD64
6260 /*
6261 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6262 * return raisexcpt();
6263 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6264 */
6265 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6266
6267 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6268 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6269 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6270 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6271 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6272
6273 iemNativeRegFreeTmp(pReNative, iTmpReg);
6274
6275#elif defined(RT_ARCH_ARM64)
6276 /*
6277 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6278 * return raisexcpt();
6279 * ----
6280 * mov x1, 0x800000000000
6281 * add x1, x0, x1
6282 * cmp xzr, x1, lsr 48
6283 * b.ne .Lraisexcpt
6284 */
6285 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6286
6287 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6288 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6289 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6290 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6291
6292 iemNativeRegFreeTmp(pReNative, iTmpReg);
6293
6294#else
6295# error "Port me"
6296#endif
6297 return off;
6298}
6299
6300
6301/**
6302 * Emits code to check if that the content of @a idxAddrReg is within the limit
6303 * of CS, raising a \#GP(0) if it isn't.
6304 *
6305 * @returns New code buffer offset; throws VBox status code on error.
6306 * @param pReNative The native recompile state.
6307 * @param off The code buffer offset.
6308 * @param idxAddrReg The host register (32-bit) with the address to
6309 * check.
6310 * @param idxInstr The current instruction.
6311 */
6312DECL_HIDDEN_THROW(uint32_t)
6313iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6314 uint8_t idxAddrReg, uint8_t idxInstr)
6315{
6316 /*
6317 * Make sure we don't have any outstanding guest register writes as we may
6318 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6319 */
6320 off = iemNativeRegFlushPendingWrites(pReNative, off);
6321
6322#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6323 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6324#else
6325 RT_NOREF(idxInstr);
6326#endif
6327
6328 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6329 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6330 kIemNativeGstRegUse_ReadOnly);
6331
6332 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6333 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6334
6335 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6336 return off;
6337}
6338
6339
6340/**
6341 * Emits a call to a CImpl function or something similar.
6342 */
6343DECL_HIDDEN_THROW(uint32_t)
6344iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6345 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6346{
6347 /* Writeback everything. */
6348 off = iemNativeRegFlushPendingWrites(pReNative, off);
6349
6350 /*
6351 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6352 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6353 */
6354 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6355 fGstShwFlush
6356 | RT_BIT_64(kIemNativeGstReg_Pc)
6357 | RT_BIT_64(kIemNativeGstReg_EFlags));
6358 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6359
6360 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6361
6362 /*
6363 * Load the parameters.
6364 */
6365#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6366 /* Special code the hidden VBOXSTRICTRC pointer. */
6367 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6368 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6369 if (cAddParams > 0)
6370 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6371 if (cAddParams > 1)
6372 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6373 if (cAddParams > 2)
6374 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6375 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6376
6377#else
6378 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6379 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6380 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6381 if (cAddParams > 0)
6382 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6383 if (cAddParams > 1)
6384 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6385 if (cAddParams > 2)
6386# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6387 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6388# else
6389 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6390# endif
6391#endif
6392
6393 /*
6394 * Make the call.
6395 */
6396 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6397
6398#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6399 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6400#endif
6401
6402 /*
6403 * Check the status code.
6404 */
6405 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6406}
6407
6408
6409/**
6410 * Emits a call to a threaded worker function.
6411 */
6412DECL_HIDDEN_THROW(uint32_t)
6413iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6414{
6415 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6416
6417 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6418 off = iemNativeRegFlushPendingWrites(pReNative, off);
6419
6420 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6421 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6422
6423#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6424 /* The threaded function may throw / long jmp, so set current instruction
6425 number if we're counting. */
6426 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6427#endif
6428
6429 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6430
6431#ifdef RT_ARCH_AMD64
6432 /* Load the parameters and emit the call. */
6433# ifdef RT_OS_WINDOWS
6434# ifndef VBOXSTRICTRC_STRICT_ENABLED
6435 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6436 if (cParams > 0)
6437 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6438 if (cParams > 1)
6439 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6440 if (cParams > 2)
6441 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6442# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6443 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6444 if (cParams > 0)
6445 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6446 if (cParams > 1)
6447 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6448 if (cParams > 2)
6449 {
6450 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6451 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6452 }
6453 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6454# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6455# else
6456 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6457 if (cParams > 0)
6458 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6459 if (cParams > 1)
6460 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6461 if (cParams > 2)
6462 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6463# endif
6464
6465 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6466
6467# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6468 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6469# endif
6470
6471#elif RT_ARCH_ARM64
6472 /*
6473 * ARM64:
6474 */
6475 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6476 if (cParams > 0)
6477 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6478 if (cParams > 1)
6479 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6480 if (cParams > 2)
6481 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6482
6483 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6484
6485#else
6486# error "port me"
6487#endif
6488
6489 /*
6490 * Check the status code.
6491 */
6492 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6493
6494 return off;
6495}
6496
6497#ifdef VBOX_WITH_STATISTICS
6498/**
6499 * Emits code to update the thread call statistics.
6500 */
6501DECL_INLINE_THROW(uint32_t)
6502iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6503{
6504 /*
6505 * Update threaded function stats.
6506 */
6507 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6508 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6509# if defined(RT_ARCH_ARM64)
6510 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6511 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6512 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6513 iemNativeRegFreeTmp(pReNative, idxTmp1);
6514 iemNativeRegFreeTmp(pReNative, idxTmp2);
6515# else
6516 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6517# endif
6518 return off;
6519}
6520#endif /* VBOX_WITH_STATISTICS */
6521
6522
6523/**
6524 * Emits the code at the CheckBranchMiss label.
6525 */
6526static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6527{
6528 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6529 if (idxLabel != UINT32_MAX)
6530 {
6531 iemNativeLabelDefine(pReNative, idxLabel, off);
6532
6533 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6534 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6535 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6536
6537 /* jump back to the return sequence. */
6538 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6539 }
6540 return off;
6541}
6542
6543
6544/**
6545 * Emits the code at the NeedCsLimChecking label.
6546 */
6547static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6548{
6549 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6550 if (idxLabel != UINT32_MAX)
6551 {
6552 iemNativeLabelDefine(pReNative, idxLabel, off);
6553
6554 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6555 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6556 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6557
6558 /* jump back to the return sequence. */
6559 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6560 }
6561 return off;
6562}
6563
6564
6565/**
6566 * Emits the code at the ObsoleteTb label.
6567 */
6568static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6569{
6570 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6571 if (idxLabel != UINT32_MAX)
6572 {
6573 iemNativeLabelDefine(pReNative, idxLabel, off);
6574
6575 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6576 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6577 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6578
6579 /* jump back to the return sequence. */
6580 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6581 }
6582 return off;
6583}
6584
6585
6586/**
6587 * Emits the code at the RaiseGP0 label.
6588 */
6589static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6590{
6591 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6592 if (idxLabel != UINT32_MAX)
6593 {
6594 iemNativeLabelDefine(pReNative, idxLabel, off);
6595
6596 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6597 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6598 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6599
6600 /* jump back to the return sequence. */
6601 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6602 }
6603 return off;
6604}
6605
6606
6607/**
6608 * Emits the code at the RaiseNm label.
6609 */
6610static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6611{
6612 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
6613 if (idxLabel != UINT32_MAX)
6614 {
6615 iemNativeLabelDefine(pReNative, idxLabel, off);
6616
6617 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
6618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6619 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
6620
6621 /* jump back to the return sequence. */
6622 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6623 }
6624 return off;
6625}
6626
6627
6628/**
6629 * Emits the code at the RaiseUd label.
6630 */
6631static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6632{
6633 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
6634 if (idxLabel != UINT32_MAX)
6635 {
6636 iemNativeLabelDefine(pReNative, idxLabel, off);
6637
6638 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
6639 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6640 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
6641
6642 /* jump back to the return sequence. */
6643 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6644 }
6645 return off;
6646}
6647
6648
6649/**
6650 * Emits the code at the RaiseMf label.
6651 */
6652static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6653{
6654 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
6655 if (idxLabel != UINT32_MAX)
6656 {
6657 iemNativeLabelDefine(pReNative, idxLabel, off);
6658
6659 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
6660 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6661 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
6662
6663 /* jump back to the return sequence. */
6664 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6665 }
6666 return off;
6667}
6668
6669
6670/**
6671 * Emits the code at the RaiseXf label.
6672 */
6673static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6674{
6675 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
6676 if (idxLabel != UINT32_MAX)
6677 {
6678 iemNativeLabelDefine(pReNative, idxLabel, off);
6679
6680 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
6681 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6682 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
6683
6684 /* jump back to the return sequence. */
6685 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6686 }
6687 return off;
6688}
6689
6690
6691/**
6692 * Emits the code at the ReturnWithFlags label (returns
6693 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6694 */
6695static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6696{
6697 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6698 if (idxLabel != UINT32_MAX)
6699 {
6700 iemNativeLabelDefine(pReNative, idxLabel, off);
6701
6702 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6703
6704 /* jump back to the return sequence. */
6705 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6706 }
6707 return off;
6708}
6709
6710
6711/**
6712 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6713 */
6714static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6715{
6716 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6717 if (idxLabel != UINT32_MAX)
6718 {
6719 iemNativeLabelDefine(pReNative, idxLabel, off);
6720
6721 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6722
6723 /* jump back to the return sequence. */
6724 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6725 }
6726 return off;
6727}
6728
6729
6730/**
6731 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6732 */
6733static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6734{
6735 /*
6736 * Generate the rc + rcPassUp fiddling code if needed.
6737 */
6738 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6739 if (idxLabel != UINT32_MAX)
6740 {
6741 iemNativeLabelDefine(pReNative, idxLabel, off);
6742
6743 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6744#ifdef RT_ARCH_AMD64
6745# ifdef RT_OS_WINDOWS
6746# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6748# endif
6749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6750 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6751# else
6752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6753 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6754# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6756# endif
6757# endif
6758# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6759 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6760# endif
6761
6762#else
6763 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6764 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6765 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6766#endif
6767
6768 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6769 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6770 }
6771 return off;
6772}
6773
6774
6775/**
6776 * Emits a standard epilog.
6777 */
6778static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6779{
6780 *pidxReturnLabel = UINT32_MAX;
6781
6782 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6783 off = iemNativeRegFlushPendingWrites(pReNative, off);
6784
6785 /*
6786 * Successful return, so clear the return register (eax, w0).
6787 */
6788 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6789
6790 /*
6791 * Define label for common return point.
6792 */
6793 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6794 *pidxReturnLabel = idxReturn;
6795
6796 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6797
6798 /*
6799 * Restore registers and return.
6800 */
6801#ifdef RT_ARCH_AMD64
6802 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6803
6804 /* Reposition esp at the r15 restore point. */
6805 pbCodeBuf[off++] = X86_OP_REX_W;
6806 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6807 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6808 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6809
6810 /* Pop non-volatile registers and return */
6811 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6812 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6813 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6814 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6815 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6816 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6817 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6818 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6819# ifdef RT_OS_WINDOWS
6820 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6821 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6822# endif
6823 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6824 pbCodeBuf[off++] = 0xc9; /* leave */
6825 pbCodeBuf[off++] = 0xc3; /* ret */
6826 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6827
6828#elif RT_ARCH_ARM64
6829 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6830
6831 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6832 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6833 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6834 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6835 IEMNATIVE_FRAME_VAR_SIZE / 8);
6836 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6837 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6838 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6839 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6840 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6841 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6842 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6843 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6844 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6845 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6846 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6847 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6848
6849 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6850 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6851 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6852 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6853
6854 /* retab / ret */
6855# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6856 if (1)
6857 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6858 else
6859# endif
6860 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6861
6862#else
6863# error "port me"
6864#endif
6865 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6866
6867 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6868}
6869
6870
6871/**
6872 * Emits a standard prolog.
6873 */
6874static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6875{
6876#ifdef RT_ARCH_AMD64
6877 /*
6878 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6879 * reserving 64 bytes for stack variables plus 4 non-register argument
6880 * slots. Fixed register assignment: xBX = pReNative;
6881 *
6882 * Since we always do the same register spilling, we can use the same
6883 * unwind description for all the code.
6884 */
6885 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6886 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6887 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6888 pbCodeBuf[off++] = 0x8b;
6889 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6890 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6891 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6892# ifdef RT_OS_WINDOWS
6893 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6894 pbCodeBuf[off++] = 0x8b;
6895 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6896 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6897 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6898# else
6899 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6900 pbCodeBuf[off++] = 0x8b;
6901 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6902# endif
6903 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6904 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6905 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6906 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6907 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6908 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6909 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6910 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6911
6912# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6913 /* Save the frame pointer. */
6914 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6915# endif
6916
6917 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6918 X86_GREG_xSP,
6919 IEMNATIVE_FRAME_ALIGN_SIZE
6920 + IEMNATIVE_FRAME_VAR_SIZE
6921 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6922 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6923 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6924 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6925 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6926
6927#elif RT_ARCH_ARM64
6928 /*
6929 * We set up a stack frame exactly like on x86, only we have to push the
6930 * return address our selves here. We save all non-volatile registers.
6931 */
6932 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6933
6934# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6935 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6936 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6937 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6938 /* pacibsp */
6939 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6940# endif
6941
6942 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6943 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6944 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6945 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6946 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6947 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6948 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6949 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6950 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6951 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6952 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6953 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6954 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6955 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6956 /* Save the BP and LR (ret address) registers at the top of the frame. */
6957 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6958 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6959 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6960 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6961 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6962 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6963
6964 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6966
6967 /* mov r28, r0 */
6968 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6969 /* mov r27, r1 */
6970 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6971
6972# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6973 /* Save the frame pointer. */
6974 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6975 ARMV8_A64_REG_X2);
6976# endif
6977
6978#else
6979# error "port me"
6980#endif
6981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6982 return off;
6983}
6984
6985
6986/*********************************************************************************************************************************
6987* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6988*********************************************************************************************************************************/
6989
6990/**
6991 * Internal work that allocates a variable with kind set to
6992 * kIemNativeVarKind_Invalid and no current stack allocation.
6993 *
6994 * The kind will either be set by the caller or later when the variable is first
6995 * assigned a value.
6996 *
6997 * @returns Unpacked index.
6998 * @internal
6999 */
7000static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7001{
7002 Assert(cbType > 0 && cbType <= 64);
7003 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7004 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7005 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7006 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7007 pReNative->Core.aVars[idxVar].cbVar = cbType;
7008 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7009 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7010 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7011 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7012 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7013 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7014 pReNative->Core.aVars[idxVar].u.uValue = 0;
7015 return idxVar;
7016}
7017
7018
7019/**
7020 * Internal work that allocates an argument variable w/o setting enmKind.
7021 *
7022 * @returns Unpacked index.
7023 * @internal
7024 */
7025static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7026{
7027 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7028 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7029 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7030
7031 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7032 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7033 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7034 return idxVar;
7035}
7036
7037
7038/**
7039 * Gets the stack slot for a stack variable, allocating one if necessary.
7040 *
7041 * Calling this function implies that the stack slot will contain a valid
7042 * variable value. The caller deals with any register currently assigned to the
7043 * variable, typically by spilling it into the stack slot.
7044 *
7045 * @returns The stack slot number.
7046 * @param pReNative The recompiler state.
7047 * @param idxVar The variable.
7048 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7049 */
7050DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7051{
7052 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7053 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7054 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7055
7056 /* Already got a slot? */
7057 uint8_t const idxStackSlot = pVar->idxStackSlot;
7058 if (idxStackSlot != UINT8_MAX)
7059 {
7060 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7061 return idxStackSlot;
7062 }
7063
7064 /*
7065 * A single slot is easy to allocate.
7066 * Allocate them from the top end, closest to BP, to reduce the displacement.
7067 */
7068 if (pVar->cbVar <= sizeof(uint64_t))
7069 {
7070 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7071 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7072 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7073 pVar->idxStackSlot = (uint8_t)iSlot;
7074 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7075 return (uint8_t)iSlot;
7076 }
7077
7078 /*
7079 * We need more than one stack slot.
7080 *
7081 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7082 */
7083 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7084 Assert(pVar->cbVar <= 64);
7085 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7086 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7087 uint32_t bmStack = ~pReNative->Core.bmStack;
7088 while (bmStack != UINT32_MAX)
7089 {
7090/** @todo allocate from the top to reduce BP displacement. */
7091 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7092 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7093 if (!(iSlot & fBitAlignMask))
7094 {
7095 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7096 {
7097 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7098 pVar->idxStackSlot = (uint8_t)iSlot;
7099 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7100 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7101 return (uint8_t)iSlot;
7102 }
7103 }
7104 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7105 }
7106 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7107}
7108
7109
7110/**
7111 * Changes the variable to a stack variable.
7112 *
7113 * Currently this is s only possible to do the first time the variable is used,
7114 * switching later is can be implemented but not done.
7115 *
7116 * @param pReNative The recompiler state.
7117 * @param idxVar The variable.
7118 * @throws VERR_IEM_VAR_IPE_2
7119 */
7120DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7121{
7122 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7123 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7124 if (pVar->enmKind != kIemNativeVarKind_Stack)
7125 {
7126 /* We could in theory transition from immediate to stack as well, but it
7127 would involve the caller doing work storing the value on the stack. So,
7128 till that's required we only allow transition from invalid. */
7129 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7130 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7131 pVar->enmKind = kIemNativeVarKind_Stack;
7132
7133 /* Note! We don't allocate a stack slot here, that's only done when a
7134 slot is actually needed to hold a variable value. */
7135 }
7136}
7137
7138
7139/**
7140 * Sets it to a variable with a constant value.
7141 *
7142 * This does not require stack storage as we know the value and can always
7143 * reload it, unless of course it's referenced.
7144 *
7145 * @param pReNative The recompiler state.
7146 * @param idxVar The variable.
7147 * @param uValue The immediate value.
7148 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7149 */
7150DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7151{
7152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7153 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7154 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7155 {
7156 /* Only simple transitions for now. */
7157 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7158 pVar->enmKind = kIemNativeVarKind_Immediate;
7159 }
7160 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7161
7162 pVar->u.uValue = uValue;
7163 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7164 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7165 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7166}
7167
7168
7169/**
7170 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7171 *
7172 * This does not require stack storage as we know the value and can always
7173 * reload it. Loading is postponed till needed.
7174 *
7175 * @param pReNative The recompiler state.
7176 * @param idxVar The variable. Unpacked.
7177 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7178 *
7179 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7180 * @internal
7181 */
7182static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7183{
7184 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7185 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7186
7187 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7188 {
7189 /* Only simple transitions for now. */
7190 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7191 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7192 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7193 }
7194 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7195
7196 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7197
7198 /* Update the other variable, ensure it's a stack variable. */
7199 /** @todo handle variables with const values... that'll go boom now. */
7200 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7201 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7202}
7203
7204
7205/**
7206 * Sets the variable to a reference (pointer) to a guest register reference.
7207 *
7208 * This does not require stack storage as we know the value and can always
7209 * reload it. Loading is postponed till needed.
7210 *
7211 * @param pReNative The recompiler state.
7212 * @param idxVar The variable.
7213 * @param enmRegClass The class guest registers to reference.
7214 * @param idxReg The register within @a enmRegClass to reference.
7215 *
7216 * @throws VERR_IEM_VAR_IPE_2
7217 */
7218DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7219 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7220{
7221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7222 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7223
7224 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7225 {
7226 /* Only simple transitions for now. */
7227 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7228 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7229 }
7230 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7231
7232 pVar->u.GstRegRef.enmClass = enmRegClass;
7233 pVar->u.GstRegRef.idx = idxReg;
7234}
7235
7236
7237DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7238{
7239 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7240}
7241
7242
7243DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7244{
7245 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7246
7247 /* Since we're using a generic uint64_t value type, we must truncate it if
7248 the variable is smaller otherwise we may end up with too large value when
7249 scaling up a imm8 w/ sign-extension.
7250
7251 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7252 in the bios, bx=1) when running on arm, because clang expect 16-bit
7253 register parameters to have bits 16 and up set to zero. Instead of
7254 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7255 CF value in the result. */
7256 switch (cbType)
7257 {
7258 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7259 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7260 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7261 }
7262 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7263 return idxVar;
7264}
7265
7266
7267DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7268{
7269 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7270 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7271 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7272 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7273 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7274 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7275
7276 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7277 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7278 return idxArgVar;
7279}
7280
7281
7282DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7283{
7284 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7285 /* Don't set to stack now, leave that to the first use as for instance
7286 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7287 return idxVar;
7288}
7289
7290
7291DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7292{
7293 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7294
7295 /* Since we're using a generic uint64_t value type, we must truncate it if
7296 the variable is smaller otherwise we may end up with too large value when
7297 scaling up a imm8 w/ sign-extension. */
7298 switch (cbType)
7299 {
7300 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7301 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7302 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7303 }
7304 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7305 return idxVar;
7306}
7307
7308
7309/**
7310 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7311 * fixed till we call iemNativeVarRegisterRelease.
7312 *
7313 * @returns The host register number.
7314 * @param pReNative The recompiler state.
7315 * @param idxVar The variable.
7316 * @param poff Pointer to the instruction buffer offset.
7317 * In case a register needs to be freed up or the value
7318 * loaded off the stack.
7319 * @param fInitialized Set if the variable must already have been initialized.
7320 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7321 * the case.
7322 * @param idxRegPref Preferred register number or UINT8_MAX.
7323 */
7324DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7325 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7326{
7327 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7328 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7329 Assert(pVar->cbVar <= 8);
7330 Assert(!pVar->fRegAcquired);
7331
7332 uint8_t idxReg = pVar->idxReg;
7333 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7334 {
7335 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7336 && pVar->enmKind < kIemNativeVarKind_End);
7337 pVar->fRegAcquired = true;
7338 return idxReg;
7339 }
7340
7341 /*
7342 * If the kind of variable has not yet been set, default to 'stack'.
7343 */
7344 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7345 && pVar->enmKind < kIemNativeVarKind_End);
7346 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7347 iemNativeVarSetKindToStack(pReNative, idxVar);
7348
7349 /*
7350 * We have to allocate a register for the variable, even if its a stack one
7351 * as we don't know if there are modification being made to it before its
7352 * finalized (todo: analyze and insert hints about that?).
7353 *
7354 * If we can, we try get the correct register for argument variables. This
7355 * is assuming that most argument variables are fetched as close as possible
7356 * to the actual call, so that there aren't any interfering hidden calls
7357 * (memory accesses, etc) inbetween.
7358 *
7359 * If we cannot or it's a variable, we make sure no argument registers
7360 * that will be used by this MC block will be allocated here, and we always
7361 * prefer non-volatile registers to avoid needing to spill stuff for internal
7362 * call.
7363 */
7364 /** @todo Detect too early argument value fetches and warn about hidden
7365 * calls causing less optimal code to be generated in the python script. */
7366
7367 uint8_t const uArgNo = pVar->uArgNo;
7368 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7369 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7370 {
7371 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7372 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7373 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7374 }
7375 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7376 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7377 {
7378 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7379 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7380 & ~pReNative->Core.bmHstRegsWithGstShadow
7381 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7382 & fNotArgsMask;
7383 if (fRegs)
7384 {
7385 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7386 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7387 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7388 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7389 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7390 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7391 }
7392 else
7393 {
7394 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7395 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7396 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7397 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7398 }
7399 }
7400 else
7401 {
7402 idxReg = idxRegPref;
7403 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7404 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7405 }
7406 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7407 pVar->idxReg = idxReg;
7408
7409 /*
7410 * Load it off the stack if we've got a stack slot.
7411 */
7412 uint8_t const idxStackSlot = pVar->idxStackSlot;
7413 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7414 {
7415 Assert(fInitialized);
7416 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7417 switch (pVar->cbVar)
7418 {
7419 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7420 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7421 case 3: AssertFailed(); RT_FALL_THRU();
7422 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7423 default: AssertFailed(); RT_FALL_THRU();
7424 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7425 }
7426 }
7427 else
7428 {
7429 Assert(idxStackSlot == UINT8_MAX);
7430 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7431 }
7432 pVar->fRegAcquired = true;
7433 return idxReg;
7434}
7435
7436
7437/**
7438 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7439 * guest register.
7440 *
7441 * This function makes sure there is a register for it and sets it to be the
7442 * current shadow copy of @a enmGstReg.
7443 *
7444 * @returns The host register number.
7445 * @param pReNative The recompiler state.
7446 * @param idxVar The variable.
7447 * @param enmGstReg The guest register this variable will be written to
7448 * after this call.
7449 * @param poff Pointer to the instruction buffer offset.
7450 * In case a register needs to be freed up or if the
7451 * variable content needs to be loaded off the stack.
7452 *
7453 * @note We DO NOT expect @a idxVar to be an argument variable,
7454 * because we can only in the commit stage of an instruction when this
7455 * function is used.
7456 */
7457DECL_HIDDEN_THROW(uint8_t)
7458iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7459{
7460 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7461 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7462 Assert(!pVar->fRegAcquired);
7463 AssertMsgStmt( pVar->cbVar <= 8
7464 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7465 || pVar->enmKind == kIemNativeVarKind_Stack),
7466 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7467 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7468 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7469
7470 /*
7471 * This shouldn't ever be used for arguments, unless it's in a weird else
7472 * branch that doesn't do any calling and even then it's questionable.
7473 *
7474 * However, in case someone writes crazy wrong MC code and does register
7475 * updates before making calls, just use the regular register allocator to
7476 * ensure we get a register suitable for the intended argument number.
7477 */
7478 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7479
7480 /*
7481 * If there is already a register for the variable, we transfer/set the
7482 * guest shadow copy assignment to it.
7483 */
7484 uint8_t idxReg = pVar->idxReg;
7485 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7486 {
7487 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7488 {
7489 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7490 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7491 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7492 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7493 }
7494 else
7495 {
7496 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7497 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7498 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7499 }
7500 /** @todo figure this one out. We need some way of making sure the register isn't
7501 * modified after this point, just in case we start writing crappy MC code. */
7502 pVar->enmGstReg = enmGstReg;
7503 pVar->fRegAcquired = true;
7504 return idxReg;
7505 }
7506 Assert(pVar->uArgNo == UINT8_MAX);
7507
7508 /*
7509 * Because this is supposed to be the commit stage, we're just tag along with the
7510 * temporary register allocator and upgrade it to a variable register.
7511 */
7512 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7513 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7514 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7515 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7516 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7517 pVar->idxReg = idxReg;
7518
7519 /*
7520 * Now we need to load the register value.
7521 */
7522 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7523 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7524 else
7525 {
7526 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7527 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7528 switch (pVar->cbVar)
7529 {
7530 case sizeof(uint64_t):
7531 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7532 break;
7533 case sizeof(uint32_t):
7534 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7535 break;
7536 case sizeof(uint16_t):
7537 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7538 break;
7539 case sizeof(uint8_t):
7540 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7541 break;
7542 default:
7543 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7544 }
7545 }
7546
7547 pVar->fRegAcquired = true;
7548 return idxReg;
7549}
7550
7551
7552/**
7553 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7554 *
7555 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7556 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7557 * requirement of flushing anything in volatile host registers when making a
7558 * call.
7559 *
7560 * @returns New @a off value.
7561 * @param pReNative The recompiler state.
7562 * @param off The code buffer position.
7563 * @param fHstRegsNotToSave Set of registers not to save & restore.
7564 */
7565DECL_HIDDEN_THROW(uint32_t)
7566iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7567{
7568 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7569 if (fHstRegs)
7570 {
7571 do
7572 {
7573 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7574 fHstRegs &= ~RT_BIT_32(idxHstReg);
7575
7576 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7577 {
7578 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7580 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7581 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7582 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7583 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7584 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7585 {
7586 case kIemNativeVarKind_Stack:
7587 {
7588 /* Temporarily spill the variable register. */
7589 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7590 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7591 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7592 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7593 continue;
7594 }
7595
7596 case kIemNativeVarKind_Immediate:
7597 case kIemNativeVarKind_VarRef:
7598 case kIemNativeVarKind_GstRegRef:
7599 /* It is weird to have any of these loaded at this point. */
7600 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7601 continue;
7602
7603 case kIemNativeVarKind_End:
7604 case kIemNativeVarKind_Invalid:
7605 break;
7606 }
7607 AssertFailed();
7608 }
7609 else
7610 {
7611 /*
7612 * Allocate a temporary stack slot and spill the register to it.
7613 */
7614 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7615 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7616 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7617 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7618 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7619 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7620 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7621 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7622 }
7623 } while (fHstRegs);
7624 }
7625 return off;
7626}
7627
7628
7629/**
7630 * Emit code to restore volatile registers after to a call to a helper.
7631 *
7632 * @returns New @a off value.
7633 * @param pReNative The recompiler state.
7634 * @param off The code buffer position.
7635 * @param fHstRegsNotToSave Set of registers not to save & restore.
7636 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7637 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7638 */
7639DECL_HIDDEN_THROW(uint32_t)
7640iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7641{
7642 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7643 if (fHstRegs)
7644 {
7645 do
7646 {
7647 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7648 fHstRegs &= ~RT_BIT_32(idxHstReg);
7649
7650 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7651 {
7652 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7653 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7654 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7655 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7656 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7657 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7658 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7659 {
7660 case kIemNativeVarKind_Stack:
7661 {
7662 /* Unspill the variable register. */
7663 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7664 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7665 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7666 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7667 continue;
7668 }
7669
7670 case kIemNativeVarKind_Immediate:
7671 case kIemNativeVarKind_VarRef:
7672 case kIemNativeVarKind_GstRegRef:
7673 /* It is weird to have any of these loaded at this point. */
7674 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7675 continue;
7676
7677 case kIemNativeVarKind_End:
7678 case kIemNativeVarKind_Invalid:
7679 break;
7680 }
7681 AssertFailed();
7682 }
7683 else
7684 {
7685 /*
7686 * Restore from temporary stack slot.
7687 */
7688 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7689 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7690 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7691 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7692
7693 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7694 }
7695 } while (fHstRegs);
7696 }
7697 return off;
7698}
7699
7700
7701/**
7702 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7703 *
7704 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7705 *
7706 * ASSUMES that @a idxVar is valid and unpacked.
7707 */
7708DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7709{
7710 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7711 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7712 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7713 {
7714 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7715 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7716 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7717 Assert(cSlots > 0);
7718 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7719 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7720 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7721 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7722 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7723 }
7724 else
7725 Assert(idxStackSlot == UINT8_MAX);
7726}
7727
7728
7729/**
7730 * Worker that frees a single variable.
7731 *
7732 * ASSUMES that @a idxVar is valid and unpacked.
7733 */
7734DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7735{
7736 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7737 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7738 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7739
7740 /* Free the host register first if any assigned. */
7741 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7742 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7743 {
7744 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7745 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7746 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7747 }
7748
7749 /* Free argument mapping. */
7750 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7751 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7752 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7753
7754 /* Free the stack slots. */
7755 iemNativeVarFreeStackSlots(pReNative, idxVar);
7756
7757 /* Free the actual variable. */
7758 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7759 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7760}
7761
7762
7763/**
7764 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7765 */
7766DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7767{
7768 while (bmVars != 0)
7769 {
7770 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7771 bmVars &= ~RT_BIT_32(idxVar);
7772
7773#if 1 /** @todo optimize by simplifying this later... */
7774 iemNativeVarFreeOneWorker(pReNative, idxVar);
7775#else
7776 /* Only need to free the host register, the rest is done as bulk updates below. */
7777 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7778 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7779 {
7780 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7781 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7782 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7783 }
7784#endif
7785 }
7786#if 0 /** @todo optimize by simplifying this later... */
7787 pReNative->Core.bmVars = 0;
7788 pReNative->Core.bmStack = 0;
7789 pReNative->Core.u64ArgVars = UINT64_MAX;
7790#endif
7791}
7792
7793
7794
7795/*********************************************************************************************************************************
7796* Emitters for IEM_MC_CALL_CIMPL_XXX *
7797*********************************************************************************************************************************/
7798
7799/**
7800 * Emits code to load a reference to the given guest register into @a idxGprDst.
7801 */
7802DECL_INLINE_THROW(uint32_t)
7803iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7804 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7805{
7806#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7807 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7808#endif
7809
7810 /*
7811 * Get the offset relative to the CPUMCTX structure.
7812 */
7813 uint32_t offCpumCtx;
7814 switch (enmClass)
7815 {
7816 case kIemNativeGstRegRef_Gpr:
7817 Assert(idxRegInClass < 16);
7818 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7819 break;
7820
7821 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7822 Assert(idxRegInClass < 4);
7823 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7824 break;
7825
7826 case kIemNativeGstRegRef_EFlags:
7827 Assert(idxRegInClass == 0);
7828 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7829 break;
7830
7831 case kIemNativeGstRegRef_MxCsr:
7832 Assert(idxRegInClass == 0);
7833 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7834 break;
7835
7836 case kIemNativeGstRegRef_FpuReg:
7837 Assert(idxRegInClass < 8);
7838 AssertFailed(); /** @todo what kind of indexing? */
7839 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7840 break;
7841
7842 case kIemNativeGstRegRef_MReg:
7843 Assert(idxRegInClass < 8);
7844 AssertFailed(); /** @todo what kind of indexing? */
7845 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7846 break;
7847
7848 case kIemNativeGstRegRef_XReg:
7849 Assert(idxRegInClass < 16);
7850 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7851 break;
7852
7853 default:
7854 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7855 }
7856
7857 /*
7858 * Load the value into the destination register.
7859 */
7860#ifdef RT_ARCH_AMD64
7861 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7862
7863#elif defined(RT_ARCH_ARM64)
7864 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7865 Assert(offCpumCtx < 4096);
7866 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7867
7868#else
7869# error "Port me!"
7870#endif
7871
7872 return off;
7873}
7874
7875
7876/**
7877 * Common code for CIMPL and AIMPL calls.
7878 *
7879 * These are calls that uses argument variables and such. They should not be
7880 * confused with internal calls required to implement an MC operation,
7881 * like a TLB load and similar.
7882 *
7883 * Upon return all that is left to do is to load any hidden arguments and
7884 * perform the call. All argument variables are freed.
7885 *
7886 * @returns New code buffer offset; throws VBox status code on error.
7887 * @param pReNative The native recompile state.
7888 * @param off The code buffer offset.
7889 * @param cArgs The total nubmer of arguments (includes hidden
7890 * count).
7891 * @param cHiddenArgs The number of hidden arguments. The hidden
7892 * arguments must not have any variable declared for
7893 * them, whereas all the regular arguments must
7894 * (tstIEMCheckMc ensures this).
7895 */
7896DECL_HIDDEN_THROW(uint32_t)
7897iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7898{
7899#ifdef VBOX_STRICT
7900 /*
7901 * Assert sanity.
7902 */
7903 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7904 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7905 for (unsigned i = 0; i < cHiddenArgs; i++)
7906 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7907 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7908 {
7909 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7910 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7911 }
7912 iemNativeRegAssertSanity(pReNative);
7913#endif
7914
7915 /* We don't know what the called function makes use of, so flush any pending register writes. */
7916 off = iemNativeRegFlushPendingWrites(pReNative, off);
7917
7918 /*
7919 * Before we do anything else, go over variables that are referenced and
7920 * make sure they are not in a register.
7921 */
7922 uint32_t bmVars = pReNative->Core.bmVars;
7923 if (bmVars)
7924 {
7925 do
7926 {
7927 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7928 bmVars &= ~RT_BIT_32(idxVar);
7929
7930 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7931 {
7932 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7933 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7934 {
7935 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7936 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7937 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7938 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7939 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7940
7941 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7942 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7943 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7944 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7945 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7946 }
7947 }
7948 } while (bmVars != 0);
7949#if 0 //def VBOX_STRICT
7950 iemNativeRegAssertSanity(pReNative);
7951#endif
7952 }
7953
7954 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7955
7956 /*
7957 * First, go over the host registers that will be used for arguments and make
7958 * sure they either hold the desired argument or are free.
7959 */
7960 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7961 {
7962 for (uint32_t i = 0; i < cRegArgs; i++)
7963 {
7964 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7965 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7966 {
7967 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7968 {
7969 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7970 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7971 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7972 Assert(pVar->idxReg == idxArgReg);
7973 uint8_t const uArgNo = pVar->uArgNo;
7974 if (uArgNo == i)
7975 { /* prefect */ }
7976 /* The variable allocator logic should make sure this is impossible,
7977 except for when the return register is used as a parameter (ARM,
7978 but not x86). */
7979#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7980 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7981 {
7982# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7983# error "Implement this"
7984# endif
7985 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7986 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7987 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7988 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7989 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7990 }
7991#endif
7992 else
7993 {
7994 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7995
7996 if (pVar->enmKind == kIemNativeVarKind_Stack)
7997 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7998 else
7999 {
8000 /* just free it, can be reloaded if used again */
8001 pVar->idxReg = UINT8_MAX;
8002 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8003 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8004 }
8005 }
8006 }
8007 else
8008 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8009 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8010 }
8011 }
8012#if 0 //def VBOX_STRICT
8013 iemNativeRegAssertSanity(pReNative);
8014#endif
8015 }
8016
8017 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8018
8019#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8020 /*
8021 * If there are any stack arguments, make sure they are in their place as well.
8022 *
8023 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8024 * the caller) be loading it later and it must be free (see first loop).
8025 */
8026 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8027 {
8028 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8029 {
8030 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8031 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8032 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8033 {
8034 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8035 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8036 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8037 pVar->idxReg = UINT8_MAX;
8038 }
8039 else
8040 {
8041 /* Use ARG0 as temp for stuff we need registers for. */
8042 switch (pVar->enmKind)
8043 {
8044 case kIemNativeVarKind_Stack:
8045 {
8046 uint8_t const idxStackSlot = pVar->idxStackSlot;
8047 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8048 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8049 iemNativeStackCalcBpDisp(idxStackSlot));
8050 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8051 continue;
8052 }
8053
8054 case kIemNativeVarKind_Immediate:
8055 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8056 continue;
8057
8058 case kIemNativeVarKind_VarRef:
8059 {
8060 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8061 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8062 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8063 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8064 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8065 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8066 {
8067 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8068 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8069 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8070 }
8071 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8072 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8073 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8074 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8075 continue;
8076 }
8077
8078 case kIemNativeVarKind_GstRegRef:
8079 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8080 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8081 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8082 continue;
8083
8084 case kIemNativeVarKind_Invalid:
8085 case kIemNativeVarKind_End:
8086 break;
8087 }
8088 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8089 }
8090 }
8091# if 0 //def VBOX_STRICT
8092 iemNativeRegAssertSanity(pReNative);
8093# endif
8094 }
8095#else
8096 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8097#endif
8098
8099 /*
8100 * Make sure the argument variables are loaded into their respective registers.
8101 *
8102 * We can optimize this by ASSUMING that any register allocations are for
8103 * registeres that have already been loaded and are ready. The previous step
8104 * saw to that.
8105 */
8106 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8107 {
8108 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8109 {
8110 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8111 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8112 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8113 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8114 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8115 else
8116 {
8117 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8118 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8119 {
8120 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8121 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8122 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8123 | RT_BIT_32(idxArgReg);
8124 pVar->idxReg = idxArgReg;
8125 }
8126 else
8127 {
8128 /* Use ARG0 as temp for stuff we need registers for. */
8129 switch (pVar->enmKind)
8130 {
8131 case kIemNativeVarKind_Stack:
8132 {
8133 uint8_t const idxStackSlot = pVar->idxStackSlot;
8134 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8135 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8136 continue;
8137 }
8138
8139 case kIemNativeVarKind_Immediate:
8140 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8141 continue;
8142
8143 case kIemNativeVarKind_VarRef:
8144 {
8145 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8146 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8147 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8148 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8149 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8150 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8151 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8152 {
8153 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8154 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8155 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8156 }
8157 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8158 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8159 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8160 continue;
8161 }
8162
8163 case kIemNativeVarKind_GstRegRef:
8164 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8165 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8166 continue;
8167
8168 case kIemNativeVarKind_Invalid:
8169 case kIemNativeVarKind_End:
8170 break;
8171 }
8172 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8173 }
8174 }
8175 }
8176#if 0 //def VBOX_STRICT
8177 iemNativeRegAssertSanity(pReNative);
8178#endif
8179 }
8180#ifdef VBOX_STRICT
8181 else
8182 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8183 {
8184 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8185 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8186 }
8187#endif
8188
8189 /*
8190 * Free all argument variables (simplified).
8191 * Their lifetime always expires with the call they are for.
8192 */
8193 /** @todo Make the python script check that arguments aren't used after
8194 * IEM_MC_CALL_XXXX. */
8195 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8196 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8197 * an argument value. There is also some FPU stuff. */
8198 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8199 {
8200 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8201 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8202
8203 /* no need to free registers: */
8204 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8205 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8206 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8207 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8208 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8209 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8210
8211 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8212 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8213 iemNativeVarFreeStackSlots(pReNative, idxVar);
8214 }
8215 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8216
8217 /*
8218 * Flush volatile registers as we make the call.
8219 */
8220 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8221
8222 return off;
8223}
8224
8225
8226
8227/*********************************************************************************************************************************
8228* TLB Lookup. *
8229*********************************************************************************************************************************/
8230
8231/**
8232 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8233 */
8234DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8235{
8236 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8237 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8238 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8239 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8240
8241 /* Do the lookup manually. */
8242 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8243 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8244 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8245 if (RT_LIKELY(pTlbe->uTag == uTag))
8246 {
8247 /*
8248 * Check TLB page table level access flags.
8249 */
8250 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8251 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8252 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8253 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8254 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8255 | IEMTLBE_F_PG_UNASSIGNED
8256 | IEMTLBE_F_PT_NO_ACCESSED
8257 | fNoWriteNoDirty | fNoUser);
8258 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8259 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8260 {
8261 /*
8262 * Return the address.
8263 */
8264 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8265 if ((uintptr_t)pbAddr == uResult)
8266 return;
8267 RT_NOREF(cbMem);
8268 AssertFailed();
8269 }
8270 else
8271 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8272 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8273 }
8274 else
8275 AssertFailed();
8276 RT_BREAKPOINT();
8277}
8278
8279/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8280
8281
8282
8283/*********************************************************************************************************************************
8284* Recompiler Core. *
8285*********************************************************************************************************************************/
8286
8287/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8288static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8289{
8290 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8291 pDis->cbCachedInstr += cbMaxRead;
8292 RT_NOREF(cbMinRead);
8293 return VERR_NO_DATA;
8294}
8295
8296
8297DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8298{
8299 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8300 {
8301#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8302 ENTRY(fLocalForcedActions),
8303 ENTRY(iem.s.rcPassUp),
8304 ENTRY(iem.s.fExec),
8305 ENTRY(iem.s.pbInstrBuf),
8306 ENTRY(iem.s.uInstrBufPc),
8307 ENTRY(iem.s.GCPhysInstrBuf),
8308 ENTRY(iem.s.cbInstrBufTotal),
8309 ENTRY(iem.s.idxTbCurInstr),
8310#ifdef VBOX_WITH_STATISTICS
8311 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8312 ENTRY(iem.s.StatNativeTlbHitsForStore),
8313 ENTRY(iem.s.StatNativeTlbHitsForStack),
8314 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8315 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8316 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8317 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8318 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8319#endif
8320 ENTRY(iem.s.DataTlb.aEntries),
8321 ENTRY(iem.s.DataTlb.uTlbRevision),
8322 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8323 ENTRY(iem.s.DataTlb.cTlbHits),
8324 ENTRY(iem.s.CodeTlb.aEntries),
8325 ENTRY(iem.s.CodeTlb.uTlbRevision),
8326 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8327 ENTRY(iem.s.CodeTlb.cTlbHits),
8328 ENTRY(pVMR3),
8329 ENTRY(cpum.GstCtx.rax),
8330 ENTRY(cpum.GstCtx.ah),
8331 ENTRY(cpum.GstCtx.rcx),
8332 ENTRY(cpum.GstCtx.ch),
8333 ENTRY(cpum.GstCtx.rdx),
8334 ENTRY(cpum.GstCtx.dh),
8335 ENTRY(cpum.GstCtx.rbx),
8336 ENTRY(cpum.GstCtx.bh),
8337 ENTRY(cpum.GstCtx.rsp),
8338 ENTRY(cpum.GstCtx.rbp),
8339 ENTRY(cpum.GstCtx.rsi),
8340 ENTRY(cpum.GstCtx.rdi),
8341 ENTRY(cpum.GstCtx.r8),
8342 ENTRY(cpum.GstCtx.r9),
8343 ENTRY(cpum.GstCtx.r10),
8344 ENTRY(cpum.GstCtx.r11),
8345 ENTRY(cpum.GstCtx.r12),
8346 ENTRY(cpum.GstCtx.r13),
8347 ENTRY(cpum.GstCtx.r14),
8348 ENTRY(cpum.GstCtx.r15),
8349 ENTRY(cpum.GstCtx.es.Sel),
8350 ENTRY(cpum.GstCtx.es.u64Base),
8351 ENTRY(cpum.GstCtx.es.u32Limit),
8352 ENTRY(cpum.GstCtx.es.Attr),
8353 ENTRY(cpum.GstCtx.cs.Sel),
8354 ENTRY(cpum.GstCtx.cs.u64Base),
8355 ENTRY(cpum.GstCtx.cs.u32Limit),
8356 ENTRY(cpum.GstCtx.cs.Attr),
8357 ENTRY(cpum.GstCtx.ss.Sel),
8358 ENTRY(cpum.GstCtx.ss.u64Base),
8359 ENTRY(cpum.GstCtx.ss.u32Limit),
8360 ENTRY(cpum.GstCtx.ss.Attr),
8361 ENTRY(cpum.GstCtx.ds.Sel),
8362 ENTRY(cpum.GstCtx.ds.u64Base),
8363 ENTRY(cpum.GstCtx.ds.u32Limit),
8364 ENTRY(cpum.GstCtx.ds.Attr),
8365 ENTRY(cpum.GstCtx.fs.Sel),
8366 ENTRY(cpum.GstCtx.fs.u64Base),
8367 ENTRY(cpum.GstCtx.fs.u32Limit),
8368 ENTRY(cpum.GstCtx.fs.Attr),
8369 ENTRY(cpum.GstCtx.gs.Sel),
8370 ENTRY(cpum.GstCtx.gs.u64Base),
8371 ENTRY(cpum.GstCtx.gs.u32Limit),
8372 ENTRY(cpum.GstCtx.gs.Attr),
8373 ENTRY(cpum.GstCtx.rip),
8374 ENTRY(cpum.GstCtx.eflags),
8375 ENTRY(cpum.GstCtx.uRipInhibitInt),
8376#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8377 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8378 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8379 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8380 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8381 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8382 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8383 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8384 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8385 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8386 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8387 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8388 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8389 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8390 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8391 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8392 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8393 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8394 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8395 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8396 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8397 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8398 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8399 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8400 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8401 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8402 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8403 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8404 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8405 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8406 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8407 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8408 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8409#endif
8410#undef ENTRY
8411 };
8412#ifdef VBOX_STRICT
8413 static bool s_fOrderChecked = false;
8414 if (!s_fOrderChecked)
8415 {
8416 s_fOrderChecked = true;
8417 uint32_t offPrev = s_aMembers[0].off;
8418 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8419 {
8420 Assert(s_aMembers[i].off > offPrev);
8421 offPrev = s_aMembers[i].off;
8422 }
8423 }
8424#endif
8425
8426 /*
8427 * Binary lookup.
8428 */
8429 unsigned iStart = 0;
8430 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8431 for (;;)
8432 {
8433 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8434 uint32_t const offCur = s_aMembers[iCur].off;
8435 if (off < offCur)
8436 {
8437 if (iCur != iStart)
8438 iEnd = iCur;
8439 else
8440 break;
8441 }
8442 else if (off > offCur)
8443 {
8444 if (iCur + 1 < iEnd)
8445 iStart = iCur + 1;
8446 else
8447 break;
8448 }
8449 else
8450 return s_aMembers[iCur].pszName;
8451 }
8452#ifdef VBOX_WITH_STATISTICS
8453 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8454 return "iem.s.acThreadedFuncStats[iFn]";
8455#endif
8456 return NULL;
8457}
8458
8459
8460/**
8461 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
8462 * @returns pszBuf.
8463 * @param fFlags The flags.
8464 * @param pszBuf The output buffer.
8465 * @param cbBuf The output buffer size. At least 32 bytes.
8466 */
8467DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
8468{
8469 Assert(cbBuf >= 32);
8470 static RTSTRTUPLE const s_aModes[] =
8471 {
8472 /* [00] = */ { RT_STR_TUPLE("16BIT") },
8473 /* [01] = */ { RT_STR_TUPLE("32BIT") },
8474 /* [02] = */ { RT_STR_TUPLE("!2!") },
8475 /* [03] = */ { RT_STR_TUPLE("!3!") },
8476 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
8477 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
8478 /* [06] = */ { RT_STR_TUPLE("!6!") },
8479 /* [07] = */ { RT_STR_TUPLE("!7!") },
8480 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
8481 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
8482 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
8483 /* [0b] = */ { RT_STR_TUPLE("!b!") },
8484 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
8485 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
8486 /* [0e] = */ { RT_STR_TUPLE("!e!") },
8487 /* [0f] = */ { RT_STR_TUPLE("!f!") },
8488 /* [10] = */ { RT_STR_TUPLE("!10!") },
8489 /* [11] = */ { RT_STR_TUPLE("!11!") },
8490 /* [12] = */ { RT_STR_TUPLE("!12!") },
8491 /* [13] = */ { RT_STR_TUPLE("!13!") },
8492 /* [14] = */ { RT_STR_TUPLE("!14!") },
8493 /* [15] = */ { RT_STR_TUPLE("!15!") },
8494 /* [16] = */ { RT_STR_TUPLE("!16!") },
8495 /* [17] = */ { RT_STR_TUPLE("!17!") },
8496 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
8497 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
8498 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
8499 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
8500 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
8501 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
8502 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
8503 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
8504 };
8505 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
8506 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
8507 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
8508
8509 pszBuf[off++] = ' ';
8510 pszBuf[off++] = 'C';
8511 pszBuf[off++] = 'P';
8512 pszBuf[off++] = 'L';
8513 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
8514 Assert(off < 32);
8515
8516 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
8517
8518 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
8519 {
8520 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
8521 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
8522 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
8523 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
8524 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
8525 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
8526 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
8527 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
8528 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
8529 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
8530 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
8531 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
8532 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
8533 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
8534 };
8535 if (fFlags)
8536 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
8537 if (s_aFlags[i].fFlag & fFlags)
8538 {
8539 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
8540 pszBuf[off++] = ' ';
8541 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
8542 off += s_aFlags[i].cchName;
8543 fFlags &= ~s_aFlags[i].fFlag;
8544 if (!fFlags)
8545 break;
8546 }
8547 pszBuf[off] = '\0';
8548
8549 return pszBuf;
8550}
8551
8552
8553DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8554{
8555 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8556#if defined(RT_ARCH_AMD64)
8557 static const char * const a_apszMarkers[] =
8558 {
8559 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8560 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8561 };
8562#endif
8563
8564 char szDisBuf[512];
8565 DISSTATE Dis;
8566 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8567 uint32_t const cNative = pTb->Native.cInstructions;
8568 uint32_t offNative = 0;
8569#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8570 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8571#endif
8572 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8573 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8574 : DISCPUMODE_64BIT;
8575#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8576 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8577#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8578 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8579#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8580# error "Port me"
8581#else
8582 csh hDisasm = ~(size_t)0;
8583# if defined(RT_ARCH_AMD64)
8584 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8585# elif defined(RT_ARCH_ARM64)
8586 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8587# else
8588# error "Port me"
8589# endif
8590 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8591
8592 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8593 //Assert(rcCs == CS_ERR_OK);
8594#endif
8595
8596 /*
8597 * Print TB info.
8598 */
8599 pHlp->pfnPrintf(pHlp,
8600 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8601 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8602 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8603 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8604#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8605 if (pDbgInfo && pDbgInfo->cEntries > 1)
8606 {
8607 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8608
8609 /*
8610 * This disassembly is driven by the debug info which follows the native
8611 * code and indicates when it starts with the next guest instructions,
8612 * where labels are and such things.
8613 */
8614 uint32_t idxThreadedCall = 0;
8615 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8616 uint8_t idxRange = UINT8_MAX;
8617 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8618 uint32_t offRange = 0;
8619 uint32_t offOpcodes = 0;
8620 uint32_t const cbOpcodes = pTb->cbOpcodes;
8621 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8622 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8623 uint32_t iDbgEntry = 1;
8624 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8625
8626 while (offNative < cNative)
8627 {
8628 /* If we're at or have passed the point where the next chunk of debug
8629 info starts, process it. */
8630 if (offDbgNativeNext <= offNative)
8631 {
8632 offDbgNativeNext = UINT32_MAX;
8633 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8634 {
8635 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8636 {
8637 case kIemTbDbgEntryType_GuestInstruction:
8638 {
8639 /* Did the exec flag change? */
8640 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8641 {
8642 pHlp->pfnPrintf(pHlp,
8643 " fExec change %#08x -> %#08x %s\n",
8644 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8645 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8646 szDisBuf, sizeof(szDisBuf)));
8647 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8648 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8649 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8650 : DISCPUMODE_64BIT;
8651 }
8652
8653 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8654 where the compilation was aborted before the opcode was recorded and the actual
8655 instruction was translated to a threaded call. This may happen when we run out
8656 of ranges, or when some complicated interrupts/FFs are found to be pending or
8657 similar. So, we just deal with it here rather than in the compiler code as it
8658 is a lot simpler to do here. */
8659 if ( idxRange == UINT8_MAX
8660 || idxRange >= cRanges
8661 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8662 {
8663 idxRange += 1;
8664 if (idxRange < cRanges)
8665 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8666 else
8667 continue;
8668 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8669 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8670 + (pTb->aRanges[idxRange].idxPhysPage == 0
8671 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8672 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8673 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8674 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8675 pTb->aRanges[idxRange].idxPhysPage);
8676 GCPhysPc += offRange;
8677 }
8678
8679 /* Disassemble the instruction. */
8680 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8681 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8682 uint32_t cbInstr = 1;
8683 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8684 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8685 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8686 if (RT_SUCCESS(rc))
8687 {
8688 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8689 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8690 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8691 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8692
8693 static unsigned const s_offMarker = 55;
8694 static char const s_szMarker[] = " ; <--- guest";
8695 if (cch < s_offMarker)
8696 {
8697 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8698 cch = s_offMarker;
8699 }
8700 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8701 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8702
8703 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8704 }
8705 else
8706 {
8707 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8708 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8709 cbInstr = 1;
8710 }
8711 GCPhysPc += cbInstr;
8712 offOpcodes += cbInstr;
8713 offRange += cbInstr;
8714 continue;
8715 }
8716
8717 case kIemTbDbgEntryType_ThreadedCall:
8718 pHlp->pfnPrintf(pHlp,
8719 " Call #%u to %s (%u args) - %s\n",
8720 idxThreadedCall,
8721 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8722 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8723 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8724 idxThreadedCall++;
8725 continue;
8726
8727 case kIemTbDbgEntryType_GuestRegShadowing:
8728 {
8729 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8730 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8731 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8732 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8733 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8734 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8735 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8736 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8737 else
8738 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8739 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8740 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8741 continue;
8742 }
8743
8744#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8745 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8746 {
8747 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8748 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8749 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8750 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8751 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8752 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8753 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8754 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8755 else
8756 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8757 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8758 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8759 continue;
8760 }
8761#endif
8762
8763 case kIemTbDbgEntryType_Label:
8764 {
8765 const char *pszName = "what_the_fudge";
8766 const char *pszComment = "";
8767 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8768 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8769 {
8770 case kIemNativeLabelType_Return:
8771 pszName = "Return";
8772 break;
8773 case kIemNativeLabelType_ReturnBreak:
8774 pszName = "ReturnBreak";
8775 break;
8776 case kIemNativeLabelType_ReturnWithFlags:
8777 pszName = "ReturnWithFlags";
8778 break;
8779 case kIemNativeLabelType_NonZeroRetOrPassUp:
8780 pszName = "NonZeroRetOrPassUp";
8781 break;
8782 case kIemNativeLabelType_RaiseGp0:
8783 pszName = "RaiseGp0";
8784 break;
8785 case kIemNativeLabelType_RaiseNm:
8786 pszName = "RaiseNm";
8787 break;
8788 case kIemNativeLabelType_RaiseUd:
8789 pszName = "RaiseUd";
8790 break;
8791 case kIemNativeLabelType_RaiseMf:
8792 pszName = "RaiseMf";
8793 break;
8794 case kIemNativeLabelType_RaiseXf:
8795 pszName = "RaiseXf";
8796 break;
8797 case kIemNativeLabelType_ObsoleteTb:
8798 pszName = "ObsoleteTb";
8799 break;
8800 case kIemNativeLabelType_NeedCsLimChecking:
8801 pszName = "NeedCsLimChecking";
8802 break;
8803 case kIemNativeLabelType_CheckBranchMiss:
8804 pszName = "CheckBranchMiss";
8805 break;
8806 case kIemNativeLabelType_If:
8807 pszName = "If";
8808 fNumbered = true;
8809 break;
8810 case kIemNativeLabelType_Else:
8811 pszName = "Else";
8812 fNumbered = true;
8813 pszComment = " ; regs state restored pre-if-block";
8814 break;
8815 case kIemNativeLabelType_Endif:
8816 pszName = "Endif";
8817 fNumbered = true;
8818 break;
8819 case kIemNativeLabelType_CheckIrq:
8820 pszName = "CheckIrq_CheckVM";
8821 fNumbered = true;
8822 break;
8823 case kIemNativeLabelType_TlbLookup:
8824 pszName = "TlbLookup";
8825 fNumbered = true;
8826 break;
8827 case kIemNativeLabelType_TlbMiss:
8828 pszName = "TlbMiss";
8829 fNumbered = true;
8830 break;
8831 case kIemNativeLabelType_TlbDone:
8832 pszName = "TlbDone";
8833 fNumbered = true;
8834 break;
8835 case kIemNativeLabelType_Invalid:
8836 case kIemNativeLabelType_End:
8837 break;
8838 }
8839 if (fNumbered)
8840 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8841 else
8842 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8843 continue;
8844 }
8845
8846 case kIemTbDbgEntryType_NativeOffset:
8847 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8848 Assert(offDbgNativeNext > offNative);
8849 break;
8850
8851#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8852 case kIemTbDbgEntryType_DelayedPcUpdate:
8853 pHlp->pfnPrintf(pHlp,
8854 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8855 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8856 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8857 continue;
8858#endif
8859
8860 default:
8861 AssertFailed();
8862 }
8863 iDbgEntry++;
8864 break;
8865 }
8866 }
8867
8868 /*
8869 * Disassemble the next native instruction.
8870 */
8871 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8872# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8873 uint32_t cbInstr = sizeof(paNative[0]);
8874 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8875 if (RT_SUCCESS(rc))
8876 {
8877# if defined(RT_ARCH_AMD64)
8878 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8879 {
8880 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8881 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8882 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8883 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8884 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8885 uInfo & 0x8000 ? "recompiled" : "todo");
8886 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8887 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8888 else
8889 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8890 }
8891 else
8892# endif
8893 {
8894 const char *pszAnnotation = NULL;
8895# ifdef RT_ARCH_AMD64
8896 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8897 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8898 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8899 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8900 PCDISOPPARAM pMemOp;
8901 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8902 pMemOp = &Dis.Param1;
8903 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8904 pMemOp = &Dis.Param2;
8905 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8906 pMemOp = &Dis.Param3;
8907 else
8908 pMemOp = NULL;
8909 if ( pMemOp
8910 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8911 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8912 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8913 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8914
8915#elif defined(RT_ARCH_ARM64)
8916 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8917 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8918 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8919# else
8920# error "Port me"
8921# endif
8922 if (pszAnnotation)
8923 {
8924 static unsigned const s_offAnnotation = 55;
8925 size_t const cchAnnotation = strlen(pszAnnotation);
8926 size_t cchDis = strlen(szDisBuf);
8927 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8928 {
8929 if (cchDis < s_offAnnotation)
8930 {
8931 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8932 cchDis = s_offAnnotation;
8933 }
8934 szDisBuf[cchDis++] = ' ';
8935 szDisBuf[cchDis++] = ';';
8936 szDisBuf[cchDis++] = ' ';
8937 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8938 }
8939 }
8940 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8941 }
8942 }
8943 else
8944 {
8945# if defined(RT_ARCH_AMD64)
8946 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8947 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8948# elif defined(RT_ARCH_ARM64)
8949 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8950# else
8951# error "Port me"
8952# endif
8953 cbInstr = sizeof(paNative[0]);
8954 }
8955 offNative += cbInstr / sizeof(paNative[0]);
8956
8957# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8958 cs_insn *pInstr;
8959 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8960 (uintptr_t)pNativeCur, 1, &pInstr);
8961 if (cInstrs > 0)
8962 {
8963 Assert(cInstrs == 1);
8964 const char *pszAnnotation = NULL;
8965# if defined(RT_ARCH_ARM64)
8966 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8967 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8968 {
8969 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8970 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8971 char *psz = strchr(pInstr->op_str, '[');
8972 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8973 {
8974 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8975 int32_t off = -1;
8976 psz += 4;
8977 if (*psz == ']')
8978 off = 0;
8979 else if (*psz == ',')
8980 {
8981 psz = RTStrStripL(psz + 1);
8982 if (*psz == '#')
8983 off = RTStrToInt32(&psz[1]);
8984 /** @todo deal with index registers and LSL as well... */
8985 }
8986 if (off >= 0)
8987 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8988 }
8989 }
8990# endif
8991
8992 size_t const cchOp = strlen(pInstr->op_str);
8993# if defined(RT_ARCH_AMD64)
8994 if (pszAnnotation)
8995 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8996 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8997 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8998 else
8999 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9000 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9001
9002# else
9003 if (pszAnnotation)
9004 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9005 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9006 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9007 else
9008 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9009 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9010# endif
9011 offNative += pInstr->size / sizeof(*pNativeCur);
9012 cs_free(pInstr, cInstrs);
9013 }
9014 else
9015 {
9016# if defined(RT_ARCH_AMD64)
9017 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9018 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9019# else
9020 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9021# endif
9022 offNative++;
9023 }
9024# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9025 }
9026 }
9027 else
9028#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9029 {
9030 /*
9031 * No debug info, just disassemble the x86 code and then the native code.
9032 *
9033 * First the guest code:
9034 */
9035 for (unsigned i = 0; i < pTb->cRanges; i++)
9036 {
9037 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9038 + (pTb->aRanges[i].idxPhysPage == 0
9039 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9040 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9041 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9042 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9043 unsigned off = pTb->aRanges[i].offOpcodes;
9044 /** @todo this ain't working when crossing pages! */
9045 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9046 while (off < cbOpcodes)
9047 {
9048 uint32_t cbInstr = 1;
9049 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9050 &pTb->pabOpcodes[off], cbOpcodes - off,
9051 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9052 if (RT_SUCCESS(rc))
9053 {
9054 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9055 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9056 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9057 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9058 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9059 GCPhysPc += cbInstr;
9060 off += cbInstr;
9061 }
9062 else
9063 {
9064 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9065 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9066 break;
9067 }
9068 }
9069 }
9070
9071 /*
9072 * Then the native code:
9073 */
9074 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9075 while (offNative < cNative)
9076 {
9077 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9078# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9079 uint32_t cbInstr = sizeof(paNative[0]);
9080 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9081 if (RT_SUCCESS(rc))
9082 {
9083# if defined(RT_ARCH_AMD64)
9084 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9085 {
9086 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9087 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9088 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9089 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9090 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9091 uInfo & 0x8000 ? "recompiled" : "todo");
9092 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9093 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9094 else
9095 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9096 }
9097 else
9098# endif
9099 {
9100# ifdef RT_ARCH_AMD64
9101 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9102 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9103 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9104 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9105# elif defined(RT_ARCH_ARM64)
9106 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9107 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9108 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9109# else
9110# error "Port me"
9111# endif
9112 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9113 }
9114 }
9115 else
9116 {
9117# if defined(RT_ARCH_AMD64)
9118 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9119 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9120# else
9121 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9122# endif
9123 cbInstr = sizeof(paNative[0]);
9124 }
9125 offNative += cbInstr / sizeof(paNative[0]);
9126
9127# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9128 cs_insn *pInstr;
9129 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9130 (uintptr_t)pNativeCur, 1, &pInstr);
9131 if (cInstrs > 0)
9132 {
9133 Assert(cInstrs == 1);
9134# if defined(RT_ARCH_AMD64)
9135 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9136 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9137# else
9138 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9139 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9140# endif
9141 offNative += pInstr->size / sizeof(*pNativeCur);
9142 cs_free(pInstr, cInstrs);
9143 }
9144 else
9145 {
9146# if defined(RT_ARCH_AMD64)
9147 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9148 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9149# else
9150 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9151# endif
9152 offNative++;
9153 }
9154# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9155 }
9156 }
9157
9158#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9159 /* Cleanup. */
9160 cs_close(&hDisasm);
9161#endif
9162}
9163
9164
9165/**
9166 * Recompiles the given threaded TB into a native one.
9167 *
9168 * In case of failure the translation block will be returned as-is.
9169 *
9170 * @returns pTb.
9171 * @param pVCpu The cross context virtual CPU structure of the calling
9172 * thread.
9173 * @param pTb The threaded translation to recompile to native.
9174 */
9175DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9176{
9177 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9178
9179 /*
9180 * The first time thru, we allocate the recompiler state, the other times
9181 * we just need to reset it before using it again.
9182 */
9183 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9184 if (RT_LIKELY(pReNative))
9185 iemNativeReInit(pReNative, pTb);
9186 else
9187 {
9188 pReNative = iemNativeInit(pVCpu, pTb);
9189 AssertReturn(pReNative, pTb);
9190 }
9191
9192#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9193 /*
9194 * First do liveness analysis. This is done backwards.
9195 */
9196 {
9197 uint32_t idxCall = pTb->Thrd.cCalls;
9198 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9199 { /* likely */ }
9200 else
9201 {
9202 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9203 while (idxCall > cAlloc)
9204 cAlloc *= 2;
9205 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9206 AssertReturn(pvNew, pTb);
9207 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9208 pReNative->cLivenessEntriesAlloc = cAlloc;
9209 }
9210 AssertReturn(idxCall > 0, pTb);
9211 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9212
9213 /* The initial (final) entry. */
9214 idxCall--;
9215 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9216
9217 /* Loop backwards thru the calls and fill in the other entries. */
9218 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9219 while (idxCall > 0)
9220 {
9221 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9222 if (pfnLiveness)
9223 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9224 else
9225 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9226 pCallEntry--;
9227 idxCall--;
9228 }
9229
9230# ifdef VBOX_WITH_STATISTICS
9231 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9232 to 'clobbered' rather that 'input'. */
9233 /** @todo */
9234# endif
9235 }
9236#endif
9237
9238 /*
9239 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9240 * for aborting if an error happens.
9241 */
9242 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9243#ifdef LOG_ENABLED
9244 uint32_t const cCallsOrg = cCallsLeft;
9245#endif
9246 uint32_t off = 0;
9247 int rc = VINF_SUCCESS;
9248 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9249 {
9250 /*
9251 * Emit prolog code (fixed).
9252 */
9253 off = iemNativeEmitProlog(pReNative, off);
9254
9255 /*
9256 * Convert the calls to native code.
9257 */
9258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9259 int32_t iGstInstr = -1;
9260#endif
9261#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9262 uint32_t cThreadedCalls = 0;
9263 uint32_t cRecompiledCalls = 0;
9264#endif
9265#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9266 uint32_t idxCurCall = 0;
9267#endif
9268 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9269 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9270 while (cCallsLeft-- > 0)
9271 {
9272 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9273#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9274 pReNative->idxCurCall = idxCurCall;
9275#endif
9276
9277 /*
9278 * Debug info, assembly markup and statistics.
9279 */
9280#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9281 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9282 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9283#endif
9284#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9285 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9286 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9287 {
9288 if (iGstInstr < (int32_t)pTb->cInstructions)
9289 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9290 else
9291 Assert(iGstInstr == pTb->cInstructions);
9292 iGstInstr = pCallEntry->idxInstr;
9293 }
9294 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9295#endif
9296#if defined(VBOX_STRICT)
9297 off = iemNativeEmitMarker(pReNative, off,
9298 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9299#endif
9300#if defined(VBOX_STRICT)
9301 iemNativeRegAssertSanity(pReNative);
9302#endif
9303#ifdef VBOX_WITH_STATISTICS
9304 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9305#endif
9306
9307 /*
9308 * Actual work.
9309 */
9310 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9311 pfnRecom ? "(recompiled)" : "(todo)"));
9312 if (pfnRecom) /** @todo stats on this. */
9313 {
9314 off = pfnRecom(pReNative, off, pCallEntry);
9315 STAM_REL_STATS({cRecompiledCalls++;});
9316 }
9317 else
9318 {
9319 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9320 STAM_REL_STATS({cThreadedCalls++;});
9321 }
9322 Assert(off <= pReNative->cInstrBufAlloc);
9323 Assert(pReNative->cCondDepth == 0);
9324
9325#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9326 if (LogIs2Enabled())
9327 {
9328 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9329# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9330 static const char s_achState[] = "CUXI";
9331# else
9332 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9333# endif
9334
9335 char szGpr[17];
9336 for (unsigned i = 0; i < 16; i++)
9337 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9338 szGpr[16] = '\0';
9339
9340 char szSegBase[X86_SREG_COUNT + 1];
9341 char szSegLimit[X86_SREG_COUNT + 1];
9342 char szSegAttrib[X86_SREG_COUNT + 1];
9343 char szSegSel[X86_SREG_COUNT + 1];
9344 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9345 {
9346 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9347 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9348 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9349 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9350 }
9351 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9352 = szSegSel[X86_SREG_COUNT] = '\0';
9353
9354 char szEFlags[8];
9355 for (unsigned i = 0; i < 7; i++)
9356 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9357 szEFlags[7] = '\0';
9358
9359 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9360 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9361 }
9362#endif
9363
9364 /*
9365 * Advance.
9366 */
9367 pCallEntry++;
9368#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9369 idxCurCall++;
9370#endif
9371 }
9372
9373 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9374 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9375 if (!cThreadedCalls)
9376 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9377
9378 /*
9379 * Emit the epilog code.
9380 */
9381 uint32_t idxReturnLabel;
9382 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9383
9384 /*
9385 * Generate special jump labels.
9386 */
9387 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9388 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9389 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9390 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9391 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
9392 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
9393 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
9394 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
9395 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
9396 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
9397 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
9398 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
9399 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
9400 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
9401 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
9402 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
9403 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
9404 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
9405 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
9406 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
9407 }
9408 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9409 {
9410 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9411 return pTb;
9412 }
9413 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9414 Assert(off <= pReNative->cInstrBufAlloc);
9415
9416 /*
9417 * Make sure all labels has been defined.
9418 */
9419 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9420#ifdef VBOX_STRICT
9421 uint32_t const cLabels = pReNative->cLabels;
9422 for (uint32_t i = 0; i < cLabels; i++)
9423 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9424#endif
9425
9426 /*
9427 * Allocate executable memory, copy over the code we've generated.
9428 */
9429 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9430 if (pTbAllocator->pDelayedFreeHead)
9431 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9432
9433 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9434 AssertReturn(paFinalInstrBuf, pTb);
9435 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9436
9437 /*
9438 * Apply fixups.
9439 */
9440 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9441 uint32_t const cFixups = pReNative->cFixups;
9442 for (uint32_t i = 0; i < cFixups; i++)
9443 {
9444 Assert(paFixups[i].off < off);
9445 Assert(paFixups[i].idxLabel < cLabels);
9446 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9447 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9448 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9449 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9450 switch (paFixups[i].enmType)
9451 {
9452#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9453 case kIemNativeFixupType_Rel32:
9454 Assert(paFixups[i].off + 4 <= off);
9455 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9456 continue;
9457
9458#elif defined(RT_ARCH_ARM64)
9459 case kIemNativeFixupType_RelImm26At0:
9460 {
9461 Assert(paFixups[i].off < off);
9462 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9463 Assert(offDisp >= -262144 && offDisp < 262144);
9464 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9465 continue;
9466 }
9467
9468 case kIemNativeFixupType_RelImm19At5:
9469 {
9470 Assert(paFixups[i].off < off);
9471 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9472 Assert(offDisp >= -262144 && offDisp < 262144);
9473 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9474 continue;
9475 }
9476
9477 case kIemNativeFixupType_RelImm14At5:
9478 {
9479 Assert(paFixups[i].off < off);
9480 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9481 Assert(offDisp >= -8192 && offDisp < 8192);
9482 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9483 continue;
9484 }
9485
9486#endif
9487 case kIemNativeFixupType_Invalid:
9488 case kIemNativeFixupType_End:
9489 break;
9490 }
9491 AssertFailed();
9492 }
9493
9494 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9495 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9496
9497 /*
9498 * Convert the translation block.
9499 */
9500 RTMemFree(pTb->Thrd.paCalls);
9501 pTb->Native.paInstructions = paFinalInstrBuf;
9502 pTb->Native.cInstructions = off;
9503 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9504#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9505 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9506 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9507#endif
9508
9509 Assert(pTbAllocator->cThreadedTbs > 0);
9510 pTbAllocator->cThreadedTbs -= 1;
9511 pTbAllocator->cNativeTbs += 1;
9512 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9513
9514#ifdef LOG_ENABLED
9515 /*
9516 * Disassemble to the log if enabled.
9517 */
9518 if (LogIs3Enabled())
9519 {
9520 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9521 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9522# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9523 RTLogFlush(NULL);
9524# endif
9525 }
9526#endif
9527 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9528
9529 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9530 return pTb;
9531}
9532
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette