VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104101

Last change on this file since 104101 was 104101, checked in by vboxsync, 11 months ago

VMM/IEM: Get rid of the inefficient looping when flushing dirty guest registers and use ASMBitFirstSetU64() to find the first set bit just like everywhere else, bugref:10614 bugref:10629

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 464.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104101 2024-03-28 07:25:23Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510 /*
511 * Adjust the request size so it'll fit the allocator alignment/whatnot.
512 *
513 * For the RTHeapSimple allocator this means to follow the logic described
514 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
515 * existing chunks if we think we've got sufficient free memory around.
516 *
517 * While for the alternative one we just align it up to a whole unit size.
518 */
519#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
520 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
521#else
522 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
523#endif
524
525 for (unsigned iIteration = 0;; iIteration++)
526 {
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
1643 *
1644 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
1647{
1648 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
1649 iemRaiseSimdFpExceptionJmp(pVCpu);
1650 else
1651 iemRaiseUndefinedOpcodeJmp(pVCpu);
1652#ifndef _MSC_VER
1653 return VINF_IEM_RAISED_XCPT; /* not reached */
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code when it wants to raise a \#NM.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1662{
1663 iemRaiseDeviceNotAvailableJmp(pVCpu);
1664#ifndef _MSC_VER
1665 return VINF_IEM_RAISED_XCPT; /* not reached */
1666#endif
1667}
1668
1669
1670/**
1671 * Used by TB code when it wants to raise a \#GP(0).
1672 */
1673IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1674{
1675 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1676#ifndef _MSC_VER
1677 return VINF_IEM_RAISED_XCPT; /* not reached */
1678#endif
1679}
1680
1681
1682/**
1683 * Used by TB code when it wants to raise a \#MF.
1684 */
1685IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1686{
1687 iemRaiseMathFaultJmp(pVCpu);
1688#ifndef _MSC_VER
1689 return VINF_IEM_RAISED_XCPT; /* not reached */
1690#endif
1691}
1692
1693
1694/**
1695 * Used by TB code when it wants to raise a \#XF.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1698{
1699 iemRaiseSimdFpExceptionJmp(pVCpu);
1700#ifndef _MSC_VER
1701 return VINF_IEM_RAISED_XCPT; /* not reached */
1702#endif
1703}
1704
1705
1706/**
1707 * Used by TB code when detecting opcode changes.
1708 * @see iemThreadeFuncWorkerObsoleteTb
1709 */
1710IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1711{
1712 /* We set fSafeToFree to false where as we're being called in the context
1713 of a TB callback function, which for native TBs means we cannot release
1714 the executable memory till we've returned our way back to iemTbExec as
1715 that return path codes via the native code generated for the TB. */
1716 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1717 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1718 return VINF_IEM_REEXEC_BREAK;
1719}
1720
1721
1722/**
1723 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1726{
1727 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1728 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1729 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1730 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1731 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1732 return VINF_IEM_REEXEC_BREAK;
1733}
1734
1735
1736/**
1737 * Used by TB code when we missed a PC check after a branch.
1738 */
1739IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1740{
1741 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1742 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1743 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1744 pVCpu->iem.s.pbInstrBuf));
1745 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1746 return VINF_IEM_REEXEC_BREAK;
1747}
1748
1749
1750
1751/*********************************************************************************************************************************
1752* Helpers: Segmented memory fetches and stores. *
1753*********************************************************************************************************************************/
1754
1755/**
1756 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1757 */
1758IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1759{
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1761 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1762#else
1763 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1764#endif
1765}
1766
1767
1768/**
1769 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1770 * to 16 bits.
1771 */
1772IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1773{
1774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1775 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1776#else
1777 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1778#endif
1779}
1780
1781
1782/**
1783 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1784 * to 32 bits.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1789 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1790#else
1791 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1792#endif
1793}
1794
1795/**
1796 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1797 * to 64 bits.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1802 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1803#else
1804 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1824 * to 32 bits.
1825 */
1826IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1827{
1828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1829 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1830#else
1831 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1832#endif
1833}
1834
1835
1836/**
1837 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1838 * to 64 bits.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1843 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1844#else
1845 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1856 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1857#else
1858 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1865 * to 64 bits.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1870 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1871#else
1872 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1883 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1884#else
1885 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1886#endif
1887}
1888
1889
1890#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1891/**
1892 * Used by TB code to load 128-bit data w/ segmentation.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1897 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1898#else
1899 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to load 128-bit data w/ segmentation.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1910 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1911#else
1912 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to load 128-bit data w/ segmentation.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1923 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1924#else
1925 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to load 256-bit data w/ segmentation.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1936 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1937#else
1938 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to load 256-bit data w/ segmentation.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1949 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1950#else
1951 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1952#endif
1953}
1954#endif
1955
1956
1957/**
1958 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1963 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1964#else
1965 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1976 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1977#else
1978 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1989 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1990#else
1991 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2002 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2003#else
2004 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2005#endif
2006}
2007
2008
2009#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2010/**
2011 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2012 */
2013IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2014{
2015#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2016 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2017#else
2018 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2019#endif
2020}
2021
2022
2023/**
2024 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2029 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2030#else
2031 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2040{
2041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2042 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2043#else
2044 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2045#endif
2046}
2047
2048
2049/**
2050 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2055 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2056#else
2057 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2058#endif
2059}
2060#endif
2061
2062
2063
2064/**
2065 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2068{
2069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2070 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2071#else
2072 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2073#endif
2074}
2075
2076
2077/**
2078 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2079 */
2080IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2081{
2082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2083 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2084#else
2085 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2086#endif
2087}
2088
2089
2090/**
2091 * Used by TB code to store an 32-bit selector value onto a generic stack.
2092 *
2093 * Intel CPUs doesn't do write a whole dword, thus the special function.
2094 */
2095IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2096{
2097#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2098 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2099#else
2100 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2101#endif
2102}
2103
2104
2105/**
2106 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2107 */
2108IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2109{
2110#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2111 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2112#else
2113 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2114#endif
2115}
2116
2117
2118/**
2119 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2122{
2123#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2124 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2125#else
2126 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2127#endif
2128}
2129
2130
2131/**
2132 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2137 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2138#else
2139 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2150 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2151#else
2152 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2153#endif
2154}
2155
2156
2157
2158/*********************************************************************************************************************************
2159* Helpers: Flat memory fetches and stores. *
2160*********************************************************************************************************************************/
2161
2162/**
2163 * Used by TB code to load unsigned 8-bit data w/ flat address.
2164 * @note Zero extending the value to 64-bit to simplify assembly.
2165 */
2166IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2167{
2168#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2169 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2170#else
2171 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2172#endif
2173}
2174
2175
2176/**
2177 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2178 * to 16 bits.
2179 * @note Zero extending the value to 64-bit to simplify assembly.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2182{
2183#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2184 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2185#else
2186 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2187#endif
2188}
2189
2190
2191/**
2192 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2193 * to 32 bits.
2194 * @note Zero extending the value to 64-bit to simplify assembly.
2195 */
2196IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2197{
2198#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2199 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2200#else
2201 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2202#endif
2203}
2204
2205
2206/**
2207 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2208 * to 64 bits.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2213 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2214#else
2215 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2216#endif
2217}
2218
2219
2220/**
2221 * Used by TB code to load unsigned 16-bit data w/ flat address.
2222 * @note Zero extending the value to 64-bit to simplify assembly.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2227 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2228#else
2229 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2236 * to 32 bits.
2237 * @note Zero extending the value to 64-bit to simplify assembly.
2238 */
2239IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2240{
2241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2242 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2243#else
2244 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2245#endif
2246}
2247
2248
2249/**
2250 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2251 * to 64 bits.
2252 * @note Zero extending the value to 64-bit to simplify assembly.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2255{
2256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2257 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2258#else
2259 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2260#endif
2261}
2262
2263
2264/**
2265 * Used by TB code to load unsigned 32-bit data w/ flat address.
2266 * @note Zero extending the value to 64-bit to simplify assembly.
2267 */
2268IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2269{
2270#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2271 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2272#else
2273 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2274#endif
2275}
2276
2277
2278/**
2279 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2280 * to 64 bits.
2281 * @note Zero extending the value to 64-bit to simplify assembly.
2282 */
2283IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2284{
2285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2286 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2287#else
2288 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2289#endif
2290}
2291
2292
2293/**
2294 * Used by TB code to load unsigned 64-bit data w/ flat address.
2295 */
2296IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2297{
2298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2299 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2300#else
2301 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2302#endif
2303}
2304
2305
2306#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2307/**
2308 * Used by TB code to load unsigned 128-bit data w/ flat address.
2309 */
2310IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2311{
2312#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2313 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2314#else
2315 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2316#endif
2317}
2318
2319
2320/**
2321 * Used by TB code to load unsigned 128-bit data w/ flat address.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2324{
2325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2326 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2327#else
2328 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2329#endif
2330}
2331
2332
2333/**
2334 * Used by TB code to load unsigned 128-bit data w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2337{
2338#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2339 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2340#else
2341 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2342#endif
2343}
2344
2345
2346/**
2347 * Used by TB code to load unsigned 256-bit data w/ flat address.
2348 */
2349IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2350{
2351#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2352 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2353#else
2354 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2355#endif
2356}
2357
2358
2359/**
2360 * Used by TB code to load unsigned 256-bit data w/ flat address.
2361 */
2362IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2365 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2366#else
2367 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2368#endif
2369}
2370#endif
2371
2372
2373/**
2374 * Used by TB code to store unsigned 8-bit data w/ flat address.
2375 */
2376IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2379 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2380#else
2381 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to store unsigned 16-bit data w/ flat address.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2390{
2391#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2392 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2393#else
2394 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2395#endif
2396}
2397
2398
2399/**
2400 * Used by TB code to store unsigned 32-bit data w/ flat address.
2401 */
2402IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2403{
2404#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2405 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2406#else
2407 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2408#endif
2409}
2410
2411
2412/**
2413 * Used by TB code to store unsigned 64-bit data w/ flat address.
2414 */
2415IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2416{
2417#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2418 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2419#else
2420 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2421#endif
2422}
2423
2424
2425#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2426/**
2427 * Used by TB code to store unsigned 128-bit data w/ flat address.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2432 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2433#else
2434 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to store unsigned 128-bit data w/ flat address.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2445 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2446#else
2447 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to store unsigned 256-bit data w/ flat address.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2458 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2459#else
2460 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to store unsigned 256-bit data w/ flat address.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2471 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2472#else
2473 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2474#endif
2475}
2476#endif
2477
2478
2479
2480/**
2481 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2482 */
2483IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2484{
2485#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2486 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2487#else
2488 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2489#endif
2490}
2491
2492
2493/**
2494 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2497{
2498#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2499 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2500#else
2501 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2502#endif
2503}
2504
2505
2506/**
2507 * Used by TB code to store a segment selector value onto a flat stack.
2508 *
2509 * Intel CPUs doesn't do write a whole dword, thus the special function.
2510 */
2511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2512{
2513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2514 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2515#else
2516 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2517#endif
2518}
2519
2520
2521/**
2522 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2523 */
2524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2525{
2526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2527 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2528#else
2529 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2530#endif
2531}
2532
2533
2534/**
2535 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2536 */
2537IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2540 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2541#else
2542 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2553 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2554#else
2555 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2566 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2567#else
2568 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2569#endif
2570}
2571
2572
2573
2574/*********************************************************************************************************************************
2575* Helpers: Segmented memory mapping. *
2576*********************************************************************************************************************************/
2577
2578/**
2579 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2580 * segmentation.
2581 */
2582IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2583 RTGCPTR GCPtrMem, uint8_t iSegReg))
2584{
2585#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2586 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2587#else
2588 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2589#endif
2590}
2591
2592
2593/**
2594 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2595 */
2596IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2597 RTGCPTR GCPtrMem, uint8_t iSegReg))
2598{
2599#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2600 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2601#else
2602 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2603#endif
2604}
2605
2606
2607/**
2608 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2609 */
2610IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2611 RTGCPTR GCPtrMem, uint8_t iSegReg))
2612{
2613#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2614 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2615#else
2616 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2617#endif
2618}
2619
2620
2621/**
2622 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2623 */
2624IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2625 RTGCPTR GCPtrMem, uint8_t iSegReg))
2626{
2627#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2628 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#else
2630 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2631#endif
2632}
2633
2634
2635/**
2636 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2637 * segmentation.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2640 RTGCPTR GCPtrMem, uint8_t iSegReg))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2644#else
2645 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2654 RTGCPTR GCPtrMem, uint8_t iSegReg))
2655{
2656#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2657 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2658#else
2659 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2660#endif
2661}
2662
2663
2664/**
2665 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2666 */
2667IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2668 RTGCPTR GCPtrMem, uint8_t iSegReg))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2672#else
2673 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2682 RTGCPTR GCPtrMem, uint8_t iSegReg))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#else
2687 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2694 * segmentation.
2695 */
2696IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2697 RTGCPTR GCPtrMem, uint8_t iSegReg))
2698{
2699#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2700 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2701#else
2702 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2703#endif
2704}
2705
2706
2707/**
2708 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2709 */
2710IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2711 RTGCPTR GCPtrMem, uint8_t iSegReg))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2715#else
2716 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2725 RTGCPTR GCPtrMem, uint8_t iSegReg))
2726{
2727#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2728 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2729#else
2730 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2731#endif
2732}
2733
2734
2735/**
2736 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2737 */
2738IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2739 RTGCPTR GCPtrMem, uint8_t iSegReg))
2740{
2741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2742 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#else
2744 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2745#endif
2746}
2747
2748
2749/**
2750 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2751 * segmentation.
2752 */
2753IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2754 RTGCPTR GCPtrMem, uint8_t iSegReg))
2755{
2756#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2757 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2758#else
2759 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2760#endif
2761}
2762
2763
2764/**
2765 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2766 */
2767IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2768 RTGCPTR GCPtrMem, uint8_t iSegReg))
2769{
2770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2771 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2772#else
2773 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2774#endif
2775}
2776
2777
2778/**
2779 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2780 */
2781IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2782 RTGCPTR GCPtrMem, uint8_t iSegReg))
2783{
2784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2785 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2786#else
2787 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2788#endif
2789}
2790
2791
2792/**
2793 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2794 */
2795IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2796 RTGCPTR GCPtrMem, uint8_t iSegReg))
2797{
2798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2799 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2800#else
2801 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2802#endif
2803}
2804
2805
2806/**
2807 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2808 */
2809IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2810 RTGCPTR GCPtrMem, uint8_t iSegReg))
2811{
2812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2813 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2814#else
2815 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2816#endif
2817}
2818
2819
2820/**
2821 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2822 */
2823IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2824 RTGCPTR GCPtrMem, uint8_t iSegReg))
2825{
2826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2827 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#else
2829 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2830#endif
2831}
2832
2833
2834/**
2835 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2836 * segmentation.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2839 RTGCPTR GCPtrMem, uint8_t iSegReg))
2840{
2841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2842 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2843#else
2844 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2845#endif
2846}
2847
2848
2849/**
2850 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2851 */
2852IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2853 RTGCPTR GCPtrMem, uint8_t iSegReg))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2857#else
2858 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2867 RTGCPTR GCPtrMem, uint8_t iSegReg))
2868{
2869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2870 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2871#else
2872 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2873#endif
2874}
2875
2876
2877/**
2878 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2879 */
2880IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2881 RTGCPTR GCPtrMem, uint8_t iSegReg))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2885#else
2886 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2887#endif
2888}
2889
2890
2891/*********************************************************************************************************************************
2892* Helpers: Flat memory mapping. *
2893*********************************************************************************************************************************/
2894
2895/**
2896 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2897 * address.
2898 */
2899IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2900{
2901#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2902 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2903#else
2904 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2905#endif
2906}
2907
2908
2909/**
2910 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2911 */
2912IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2913{
2914#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2915 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2916#else
2917 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2918#endif
2919}
2920
2921
2922/**
2923 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2924 */
2925IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2926{
2927#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2928 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2929#else
2930 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2931#endif
2932}
2933
2934
2935/**
2936 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2937 */
2938IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2939{
2940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2941 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2942#else
2943 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2944#endif
2945}
2946
2947
2948/**
2949 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2950 * address.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2953{
2954#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2955 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2956#else
2957 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2958#endif
2959}
2960
2961
2962/**
2963 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2964 */
2965IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2966{
2967#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2968 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2969#else
2970 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2971#endif
2972}
2973
2974
2975/**
2976 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2977 */
2978IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2979{
2980#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2981 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2982#else
2983 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2984#endif
2985}
2986
2987
2988/**
2989 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2990 */
2991IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2992{
2993#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2994 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2995#else
2996 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2997#endif
2998}
2999
3000
3001/**
3002 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
3003 * address.
3004 */
3005IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3006{
3007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3008 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3009#else
3010 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3011#endif
3012}
3013
3014
3015/**
3016 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3017 */
3018IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3019{
3020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3021 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3022#else
3023 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3024#endif
3025}
3026
3027
3028/**
3029 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3030 */
3031IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3032{
3033#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3034 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3035#else
3036 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3037#endif
3038}
3039
3040
3041/**
3042 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3043 */
3044IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3045{
3046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3047 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3048#else
3049 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3050#endif
3051}
3052
3053
3054/**
3055 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3056 * address.
3057 */
3058IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3059{
3060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3061 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3062#else
3063 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3064#endif
3065}
3066
3067
3068/**
3069 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3070 */
3071IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3072{
3073#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3074 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3075#else
3076 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3077#endif
3078}
3079
3080
3081/**
3082 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3083 */
3084IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3085{
3086#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3087 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3088#else
3089 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3090#endif
3091}
3092
3093
3094/**
3095 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3096 */
3097IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3098{
3099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3100 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3101#else
3102 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3103#endif
3104}
3105
3106
3107/**
3108 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3109 */
3110IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3111{
3112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3113 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3114#else
3115 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3116#endif
3117}
3118
3119
3120/**
3121 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3122 */
3123IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3124{
3125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3126 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3127#else
3128 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3129#endif
3130}
3131
3132
3133/**
3134 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3135 * address.
3136 */
3137IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3138{
3139#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3140 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3141#else
3142 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3143#endif
3144}
3145
3146
3147/**
3148 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3149 */
3150IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3151{
3152#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3153 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3154#else
3155 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3156#endif
3157}
3158
3159
3160/**
3161 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3162 */
3163IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3164{
3165#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3166 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3167#else
3168 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3169#endif
3170}
3171
3172
3173/**
3174 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3177{
3178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3179 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3180#else
3181 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3182#endif
3183}
3184
3185
3186/*********************************************************************************************************************************
3187* Helpers: Commit, rollback & unmap *
3188*********************************************************************************************************************************/
3189
3190/**
3191 * Used by TB code to commit and unmap a read-write memory mapping.
3192 */
3193IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3194{
3195 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3196}
3197
3198
3199/**
3200 * Used by TB code to commit and unmap a read-write memory mapping.
3201 */
3202IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3203{
3204 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3205}
3206
3207
3208/**
3209 * Used by TB code to commit and unmap a write-only memory mapping.
3210 */
3211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3212{
3213 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3214}
3215
3216
3217/**
3218 * Used by TB code to commit and unmap a read-only memory mapping.
3219 */
3220IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3221{
3222 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3223}
3224
3225
3226/**
3227 * Reinitializes the native recompiler state.
3228 *
3229 * Called before starting a new recompile job.
3230 */
3231static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3232{
3233 pReNative->cLabels = 0;
3234 pReNative->bmLabelTypes = 0;
3235 pReNative->cFixups = 0;
3236#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3237 pReNative->pDbgInfo->cEntries = 0;
3238 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
3239#endif
3240 pReNative->pTbOrg = pTb;
3241 pReNative->cCondDepth = 0;
3242 pReNative->uCondSeqNo = 0;
3243 pReNative->uCheckIrqSeqNo = 0;
3244 pReNative->uTlbSeqNo = 0;
3245
3246#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3247 pReNative->Core.offPc = 0;
3248 pReNative->Core.cInstrPcUpdateSkipped = 0;
3249#endif
3250#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3251 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3252#endif
3253 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3254#if IEMNATIVE_HST_GREG_COUNT < 32
3255 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3256#endif
3257 ;
3258 pReNative->Core.bmHstRegsWithGstShadow = 0;
3259 pReNative->Core.bmGstRegShadows = 0;
3260#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3261 pReNative->Core.bmGstRegShadowDirty = 0;
3262#endif
3263 pReNative->Core.bmVars = 0;
3264 pReNative->Core.bmStack = 0;
3265 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3266 pReNative->Core.u64ArgVars = UINT64_MAX;
3267
3268 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
3269 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3270 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3271 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3272 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3273 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3274 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3275 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3276 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3277 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3278 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3279 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3280 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3281 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3282 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3283 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3284 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3285 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
3286
3287 /* Full host register reinit: */
3288 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3289 {
3290 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3291 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3292 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3293 }
3294
3295 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3296 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3297#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3298 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3299#endif
3300#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3301 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3302#endif
3303#ifdef IEMNATIVE_REG_FIXED_TMP1
3304 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3305#endif
3306#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3307 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3308#endif
3309 );
3310 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3311 {
3312 fRegs &= ~RT_BIT_32(idxReg);
3313 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3314 }
3315
3316 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3317#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3318 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3319#endif
3320#ifdef IEMNATIVE_REG_FIXED_TMP0
3321 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3322#endif
3323#ifdef IEMNATIVE_REG_FIXED_TMP1
3324 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3325#endif
3326#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3327 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3328#endif
3329
3330#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3331 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3332# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3333 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3334# endif
3335 ;
3336 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3337 pReNative->Core.bmGstSimdRegShadows = 0;
3338 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3339 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3340
3341 /* Full host register reinit: */
3342 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3343 {
3344 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3345 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3346 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3347 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3348 }
3349
3350 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
3351 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3352 {
3353 fRegs &= ~RT_BIT_32(idxReg);
3354 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3355 }
3356
3357#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3358 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3359#endif
3360
3361#endif
3362
3363 return pReNative;
3364}
3365
3366
3367/**
3368 * Allocates and initializes the native recompiler state.
3369 *
3370 * This is called the first time an EMT wants to recompile something.
3371 *
3372 * @returns Pointer to the new recompiler state.
3373 * @param pVCpu The cross context virtual CPU structure of the calling
3374 * thread.
3375 * @param pTb The TB that's about to be recompiled.
3376 * @thread EMT(pVCpu)
3377 */
3378static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3379{
3380 VMCPU_ASSERT_EMT(pVCpu);
3381
3382 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3383 AssertReturn(pReNative, NULL);
3384
3385 /*
3386 * Try allocate all the buffers and stuff we need.
3387 */
3388 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3389 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3390 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3391#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3392 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3393#endif
3394 if (RT_LIKELY( pReNative->pInstrBuf
3395 && pReNative->paLabels
3396 && pReNative->paFixups)
3397#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3398 && pReNative->pDbgInfo
3399#endif
3400 )
3401 {
3402 /*
3403 * Set the buffer & array sizes on success.
3404 */
3405 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3406 pReNative->cLabelsAlloc = _8K;
3407 pReNative->cFixupsAlloc = _16K;
3408#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3409 pReNative->cDbgInfoAlloc = _16K;
3410#endif
3411
3412 /* Other constant stuff: */
3413 pReNative->pVCpu = pVCpu;
3414
3415 /*
3416 * Done, just need to save it and reinit it.
3417 */
3418 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3419 return iemNativeReInit(pReNative, pTb);
3420 }
3421
3422 /*
3423 * Failed. Cleanup and return.
3424 */
3425 AssertFailed();
3426 RTMemFree(pReNative->pInstrBuf);
3427 RTMemFree(pReNative->paLabels);
3428 RTMemFree(pReNative->paFixups);
3429#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3430 RTMemFree(pReNative->pDbgInfo);
3431#endif
3432 RTMemFree(pReNative);
3433 return NULL;
3434}
3435
3436
3437/**
3438 * Creates a label
3439 *
3440 * If the label does not yet have a defined position,
3441 * call iemNativeLabelDefine() later to set it.
3442 *
3443 * @returns Label ID. Throws VBox status code on failure, so no need to check
3444 * the return value.
3445 * @param pReNative The native recompile state.
3446 * @param enmType The label type.
3447 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3448 * label is not yet defined (default).
3449 * @param uData Data associated with the lable. Only applicable to
3450 * certain type of labels. Default is zero.
3451 */
3452DECL_HIDDEN_THROW(uint32_t)
3453iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3454 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3455{
3456 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3457
3458 /*
3459 * Locate existing label definition.
3460 *
3461 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3462 * and uData is zero.
3463 */
3464 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3465 uint32_t const cLabels = pReNative->cLabels;
3466 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3467#ifndef VBOX_STRICT
3468 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3469 && offWhere == UINT32_MAX
3470 && uData == 0
3471#endif
3472 )
3473 {
3474#ifndef VBOX_STRICT
3475 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3476 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3477 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3478 if (idxLabel < pReNative->cLabels)
3479 return idxLabel;
3480#else
3481 for (uint32_t i = 0; i < cLabels; i++)
3482 if ( paLabels[i].enmType == enmType
3483 && paLabels[i].uData == uData)
3484 {
3485 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3486 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3487 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3488 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3489 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3490 return i;
3491 }
3492 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3493 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3494#endif
3495 }
3496
3497 /*
3498 * Make sure we've got room for another label.
3499 */
3500 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3501 { /* likely */ }
3502 else
3503 {
3504 uint32_t cNew = pReNative->cLabelsAlloc;
3505 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3506 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3507 cNew *= 2;
3508 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3509 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3510 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3511 pReNative->paLabels = paLabels;
3512 pReNative->cLabelsAlloc = cNew;
3513 }
3514
3515 /*
3516 * Define a new label.
3517 */
3518 paLabels[cLabels].off = offWhere;
3519 paLabels[cLabels].enmType = enmType;
3520 paLabels[cLabels].uData = uData;
3521 pReNative->cLabels = cLabels + 1;
3522
3523 Assert((unsigned)enmType < 64);
3524 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3525
3526 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3527 {
3528 Assert(uData == 0);
3529 pReNative->aidxUniqueLabels[enmType] = cLabels;
3530 }
3531
3532 if (offWhere != UINT32_MAX)
3533 {
3534#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3535 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3536 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3537#endif
3538 }
3539 return cLabels;
3540}
3541
3542
3543/**
3544 * Defines the location of an existing label.
3545 *
3546 * @param pReNative The native recompile state.
3547 * @param idxLabel The label to define.
3548 * @param offWhere The position.
3549 */
3550DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3551{
3552 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3553 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3554 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3555 pLabel->off = offWhere;
3556#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3557 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3558 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3559#endif
3560}
3561
3562
3563/**
3564 * Looks up a lable.
3565 *
3566 * @returns Label ID if found, UINT32_MAX if not.
3567 */
3568static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3569 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3570{
3571 Assert((unsigned)enmType < 64);
3572 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3573 {
3574 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3575 return pReNative->aidxUniqueLabels[enmType];
3576
3577 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3578 uint32_t const cLabels = pReNative->cLabels;
3579 for (uint32_t i = 0; i < cLabels; i++)
3580 if ( paLabels[i].enmType == enmType
3581 && paLabels[i].uData == uData
3582 && ( paLabels[i].off == offWhere
3583 || offWhere == UINT32_MAX
3584 || paLabels[i].off == UINT32_MAX))
3585 return i;
3586 }
3587 return UINT32_MAX;
3588}
3589
3590
3591/**
3592 * Adds a fixup.
3593 *
3594 * @throws VBox status code (int) on failure.
3595 * @param pReNative The native recompile state.
3596 * @param offWhere The instruction offset of the fixup location.
3597 * @param idxLabel The target label ID for the fixup.
3598 * @param enmType The fixup type.
3599 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3600 */
3601DECL_HIDDEN_THROW(void)
3602iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3603 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3604{
3605 Assert(idxLabel <= UINT16_MAX);
3606 Assert((unsigned)enmType <= UINT8_MAX);
3607#ifdef RT_ARCH_ARM64
3608 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3609 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3610 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3611#endif
3612
3613 /*
3614 * Make sure we've room.
3615 */
3616 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3617 uint32_t const cFixups = pReNative->cFixups;
3618 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3619 { /* likely */ }
3620 else
3621 {
3622 uint32_t cNew = pReNative->cFixupsAlloc;
3623 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3624 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3625 cNew *= 2;
3626 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3627 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3628 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3629 pReNative->paFixups = paFixups;
3630 pReNative->cFixupsAlloc = cNew;
3631 }
3632
3633 /*
3634 * Add the fixup.
3635 */
3636 paFixups[cFixups].off = offWhere;
3637 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3638 paFixups[cFixups].enmType = enmType;
3639 paFixups[cFixups].offAddend = offAddend;
3640 pReNative->cFixups = cFixups + 1;
3641}
3642
3643
3644/**
3645 * Slow code path for iemNativeInstrBufEnsure.
3646 */
3647DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3648{
3649 /* Double the buffer size till we meet the request. */
3650 uint32_t cNew = pReNative->cInstrBufAlloc;
3651 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3652 do
3653 cNew *= 2;
3654 while (cNew < off + cInstrReq);
3655
3656 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3657#ifdef RT_ARCH_ARM64
3658 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3659#else
3660 uint32_t const cbMaxInstrBuf = _2M;
3661#endif
3662 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3663
3664 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3665 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3666
3667#ifdef VBOX_STRICT
3668 pReNative->offInstrBufChecked = off + cInstrReq;
3669#endif
3670 pReNative->cInstrBufAlloc = cNew;
3671 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3672}
3673
3674#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3675
3676/**
3677 * Grows the static debug info array used during recompilation.
3678 *
3679 * @returns Pointer to the new debug info block; throws VBox status code on
3680 * failure, so no need to check the return value.
3681 */
3682DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3683{
3684 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3685 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3686 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3687 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3688 pReNative->pDbgInfo = pDbgInfo;
3689 pReNative->cDbgInfoAlloc = cNew;
3690 return pDbgInfo;
3691}
3692
3693
3694/**
3695 * Adds a new debug info uninitialized entry, returning the pointer to it.
3696 */
3697DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3698{
3699 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3700 { /* likely */ }
3701 else
3702 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3703 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3704}
3705
3706
3707/**
3708 * Debug Info: Adds a native offset record, if necessary.
3709 */
3710DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3711{
3712 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3713
3714 /*
3715 * Do we need this one?
3716 */
3717 uint32_t const offPrev = pDbgInfo->offNativeLast;
3718 if (offPrev == off)
3719 return;
3720 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3721
3722 /*
3723 * Add it.
3724 */
3725 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3726 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3727 pEntry->NativeOffset.offNative = off;
3728 pDbgInfo->offNativeLast = off;
3729}
3730
3731
3732/**
3733 * Debug Info: Record info about a label.
3734 */
3735static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3736{
3737 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3738 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3739 pEntry->Label.uUnused = 0;
3740 pEntry->Label.enmLabel = (uint8_t)enmType;
3741 pEntry->Label.uData = uData;
3742}
3743
3744
3745/**
3746 * Debug Info: Record info about a threaded call.
3747 */
3748static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3749{
3750 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3751 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3752 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3753 pEntry->ThreadedCall.uUnused = 0;
3754 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3755}
3756
3757
3758/**
3759 * Debug Info: Record info about a new guest instruction.
3760 */
3761static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3762{
3763 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3764 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3765 pEntry->GuestInstruction.uUnused = 0;
3766 pEntry->GuestInstruction.fExec = fExec;
3767}
3768
3769
3770/**
3771 * Debug Info: Record info about guest register shadowing.
3772 */
3773DECL_HIDDEN_THROW(void)
3774iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3775 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3776{
3777 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3778 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3779 pEntry->GuestRegShadowing.uUnused = 0;
3780 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3781 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3782 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3783#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3784 Assert( idxHstReg != UINT8_MAX
3785 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
3786#endif
3787}
3788
3789
3790# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3791/**
3792 * Debug Info: Record info about guest register shadowing.
3793 */
3794DECL_HIDDEN_THROW(void)
3795iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3796 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3797{
3798 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3799 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3800 pEntry->GuestSimdRegShadowing.uUnused = 0;
3801 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3802 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3803 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3804}
3805# endif
3806
3807
3808# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3809/**
3810 * Debug Info: Record info about delayed RIP updates.
3811 */
3812DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3813{
3814 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3815 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3816 pEntry->DelayedPcUpdate.offPc = offPc;
3817 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3818}
3819# endif
3820
3821# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
3822
3823/**
3824 * Debug Info: Record info about a dirty guest register.
3825 */
3826DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
3827 uint8_t idxGstReg, uint8_t idxHstReg)
3828{
3829 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3830 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
3831 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
3832 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
3833 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
3834}
3835
3836
3837/**
3838 * Debug Info: Record info about a dirty guest register writeback operation.
3839 */
3840DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
3841{
3842 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3843 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
3844 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
3845 pEntry->GuestRegWriteback.fGstReg = (uint32_t)fGstReg;
3846 /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */
3847 Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);
3848}
3849
3850# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
3851
3852#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3853
3854
3855/*********************************************************************************************************************************
3856* Register Allocator *
3857*********************************************************************************************************************************/
3858
3859/**
3860 * Register parameter indexes (indexed by argument number).
3861 */
3862DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3863{
3864 IEMNATIVE_CALL_ARG0_GREG,
3865 IEMNATIVE_CALL_ARG1_GREG,
3866 IEMNATIVE_CALL_ARG2_GREG,
3867 IEMNATIVE_CALL_ARG3_GREG,
3868#if defined(IEMNATIVE_CALL_ARG4_GREG)
3869 IEMNATIVE_CALL_ARG4_GREG,
3870# if defined(IEMNATIVE_CALL_ARG5_GREG)
3871 IEMNATIVE_CALL_ARG5_GREG,
3872# if defined(IEMNATIVE_CALL_ARG6_GREG)
3873 IEMNATIVE_CALL_ARG6_GREG,
3874# if defined(IEMNATIVE_CALL_ARG7_GREG)
3875 IEMNATIVE_CALL_ARG7_GREG,
3876# endif
3877# endif
3878# endif
3879#endif
3880};
3881AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3882
3883/**
3884 * Call register masks indexed by argument count.
3885 */
3886DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3887{
3888 0,
3889 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3890 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3891 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3892 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3893 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3894#if defined(IEMNATIVE_CALL_ARG4_GREG)
3895 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3896 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3897# if defined(IEMNATIVE_CALL_ARG5_GREG)
3898 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3899 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3900# if defined(IEMNATIVE_CALL_ARG6_GREG)
3901 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3902 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3903 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3904# if defined(IEMNATIVE_CALL_ARG7_GREG)
3905 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3906 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3907 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3908# endif
3909# endif
3910# endif
3911#endif
3912};
3913
3914#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3915/**
3916 * BP offset of the stack argument slots.
3917 *
3918 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3919 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3920 */
3921DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3922{
3923 IEMNATIVE_FP_OFF_STACK_ARG0,
3924# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3925 IEMNATIVE_FP_OFF_STACK_ARG1,
3926# endif
3927# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3928 IEMNATIVE_FP_OFF_STACK_ARG2,
3929# endif
3930# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3931 IEMNATIVE_FP_OFF_STACK_ARG3,
3932# endif
3933};
3934AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3935#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3936
3937/**
3938 * Info about shadowed guest register values.
3939 * @see IEMNATIVEGSTREG
3940 */
3941DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3942{
3943#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3944 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3945 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3946 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3947 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3948 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3949 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3950 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3951 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3952 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3953 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3954 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3955 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3956 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3957 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3958 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3959 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3960 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3961 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3962 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3963 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3964 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3965 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3966 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3967 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3968 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3969 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3970 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3971 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3972 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3973 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3974 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3975 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3976 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3977 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3978 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3979 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3980 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3981 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3982 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3983 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3984 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3985 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3986 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3987 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3988 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3989 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3990 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3991 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3992#undef CPUMCTX_OFF_AND_SIZE
3993};
3994AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3995
3996
3997/** Host CPU general purpose register names. */
3998DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3999{
4000#ifdef RT_ARCH_AMD64
4001 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
4002#elif RT_ARCH_ARM64
4003 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
4004 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
4005#else
4006# error "port me"
4007#endif
4008};
4009
4010
4011#if 0 /* unused */
4012/**
4013 * Tries to locate a suitable register in the given register mask.
4014 *
4015 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4016 * failed.
4017 *
4018 * @returns Host register number on success, returns UINT8_MAX on failure.
4019 */
4020static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
4021{
4022 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4023 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4024 if (fRegs)
4025 {
4026 /** @todo pick better here: */
4027 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
4028
4029 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4030 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4031 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4032 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4033
4034 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4035 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4036 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4037 return idxReg;
4038 }
4039 return UINT8_MAX;
4040}
4041#endif /* unused */
4042
4043
4044#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4045/**
4046 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
4047 *
4048 * @returns New code buffer offset on success, UINT32_MAX on failure.
4049 * @param pReNative .
4050 * @param off The current code buffer position.
4051 * @param enmGstReg The guest register to store to.
4052 * @param idxHstReg The host register to store from.
4053 */
4054DECL_FORCE_INLINE_THROW(uint32_t)
4055iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
4056{
4057 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4058 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4059
4060 switch (g_aGstShadowInfo[enmGstReg].cb)
4061 {
4062 case sizeof(uint64_t):
4063 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4064 case sizeof(uint32_t):
4065 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4066 case sizeof(uint16_t):
4067 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4068#if 0 /* not present in the table. */
4069 case sizeof(uint8_t):
4070 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4071#endif
4072 default:
4073 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4074 }
4075}
4076
4077
4078/**
4079 * Emits code to flush a pending write of the given guest register if any.
4080 *
4081 * @returns New code buffer offset.
4082 * @param pReNative The native recompile state.
4083 * @param off Current code buffer position.
4084 * @param enmGstReg The guest register to flush.
4085 */
4086DECL_HIDDEN_THROW(uint32_t)
4087iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
4088{
4089 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4090
4091 Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);
4092 Assert( idxHstReg != UINT8_MAX
4093 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
4094 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
4095 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
4096
4097 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
4098
4099 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
4100 return off;
4101}
4102
4103
4104/**
4105 * Flush the given set of guest registers if marked as dirty.
4106 *
4107 * @returns New code buffer offset.
4108 * @param pReNative The native recompile state.
4109 * @param off Current code buffer position.
4110 * @param fFlushGstReg The guest register set to flush (default is flush everything).
4111 */
4112DECL_HIDDEN_THROW(uint32_t)
4113iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
4114{
4115 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
4116 if (bmGstRegShadowDirty)
4117 {
4118# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4119 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4120 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
4121# endif
4122 do
4123 {
4124 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
4125 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
4126 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4127 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4128 } while (bmGstRegShadowDirty);
4129 }
4130
4131 return off;
4132}
4133
4134
4135/**
4136 * Flush all shadowed guest registers marked as dirty for the given host register.
4137 *
4138 * @returns New code buffer offset.
4139 * @param pReNative The native recompile state.
4140 * @param off Current code buffer position.
4141 * @param idxHstReg The host register.
4142 *
4143 * @note This doesn't do any unshadowing of guest registers from the host register.
4144 */
4145DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
4146{
4147 /* We need to flush any pending guest register writes this host register shadows. */
4148 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4149 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
4150 {
4151# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4152 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4153 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
4154# endif
4155 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
4156 * likely to only have a single bit set. It'll be in the 0..15 range,
4157 * but still it's 15 unnecessary loops for the last guest register. */
4158
4159 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
4160 do
4161 {
4162 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
4163 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
4164 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4165 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4166 } while (bmGstRegShadowDirty);
4167 }
4168
4169 return off;
4170}
4171#endif
4172
4173
4174/**
4175 * Locate a register, possibly freeing one up.
4176 *
4177 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4178 * failed.
4179 *
4180 * @returns Host register number on success. Returns UINT8_MAX if no registers
4181 * found, the caller is supposed to deal with this and raise a
4182 * allocation type specific status code (if desired).
4183 *
4184 * @throws VBox status code if we're run into trouble spilling a variable of
4185 * recording debug info. Does NOT throw anything if we're out of
4186 * registers, though.
4187 */
4188static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4189 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4190{
4191 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4192 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4193 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4194
4195 /*
4196 * Try a freed register that's shadowing a guest register.
4197 */
4198 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4199 if (fRegs)
4200 {
4201 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4202
4203#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4204 /*
4205 * When we have livness information, we use it to kick out all shadowed
4206 * guest register that will not be needed any more in this TB. If we're
4207 * lucky, this may prevent us from ending up here again.
4208 *
4209 * Note! We must consider the previous entry here so we don't free
4210 * anything that the current threaded function requires (current
4211 * entry is produced by the next threaded function).
4212 */
4213 uint32_t const idxCurCall = pReNative->idxCurCall;
4214 if (idxCurCall > 0)
4215 {
4216 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4217
4218# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4219 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4220 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4221 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4222#else
4223 /* Construct a mask of the registers not in the read or write state.
4224 Note! We could skips writes, if they aren't from us, as this is just
4225 a hack to prevent trashing registers that have just been written
4226 or will be written when we retire the current instruction. */
4227 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4228 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4229 & IEMLIVENESSBIT_MASK;
4230#endif
4231 /* Merge EFLAGS. */
4232 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4233 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4234 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4235 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4236 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4237
4238 /* If it matches any shadowed registers. */
4239 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4240 {
4241#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4242 /* Writeback any dirty shadow registers we are about to unshadow. */
4243 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
4244#endif
4245
4246 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4247 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4248 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4249
4250 /* See if we've got any unshadowed registers we can return now. */
4251 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4252 if (fUnshadowedRegs)
4253 {
4254 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4255 return (fPreferVolatile
4256 ? ASMBitFirstSetU32(fUnshadowedRegs)
4257 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4258 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4259 - 1;
4260 }
4261 }
4262 }
4263#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4264
4265 unsigned const idxReg = (fPreferVolatile
4266 ? ASMBitFirstSetU32(fRegs)
4267 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4268 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4269 - 1;
4270
4271 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4272 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4273 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4274 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4275
4276#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4277 /* We need to flush any pending guest register writes this host register shadows. */
4278 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
4279#endif
4280
4281 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4282 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4283 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4284 return idxReg;
4285 }
4286
4287 /*
4288 * Try free up a variable that's in a register.
4289 *
4290 * We do two rounds here, first evacuating variables we don't need to be
4291 * saved on the stack, then in the second round move things to the stack.
4292 */
4293 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4294 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4295 {
4296 uint32_t fVars = pReNative->Core.bmVars;
4297 while (fVars)
4298 {
4299 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4300 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4301#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4302 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4303 continue;
4304#endif
4305
4306 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4307 && (RT_BIT_32(idxReg) & fRegMask)
4308 && ( iLoop == 0
4309 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4310 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4311 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4312 {
4313 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4314 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4315 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4316 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4317 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4318 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4319#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4320 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4321#endif
4322
4323 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4324 {
4325 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4326 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4327 }
4328
4329 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4330 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4331
4332 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4333 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4334 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4335 return idxReg;
4336 }
4337 fVars &= ~RT_BIT_32(idxVar);
4338 }
4339 }
4340
4341 return UINT8_MAX;
4342}
4343
4344
4345/**
4346 * Reassigns a variable to a different register specified by the caller.
4347 *
4348 * @returns The new code buffer position.
4349 * @param pReNative The native recompile state.
4350 * @param off The current code buffer position.
4351 * @param idxVar The variable index.
4352 * @param idxRegOld The old host register number.
4353 * @param idxRegNew The new host register number.
4354 * @param pszCaller The caller for logging.
4355 */
4356static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4357 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4358{
4359 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4360 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4361#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4362 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4363#endif
4364 RT_NOREF(pszCaller);
4365
4366#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4367 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4368#endif
4369 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4370
4371 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4372#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4373 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4374#endif
4375 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4376 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4377 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4378
4379 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4380 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4381 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4382 if (fGstRegShadows)
4383 {
4384 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4385 | RT_BIT_32(idxRegNew);
4386 while (fGstRegShadows)
4387 {
4388 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4389 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4390
4391 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4392 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4393 }
4394 }
4395
4396 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4397 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4398 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4399 return off;
4400}
4401
4402
4403/**
4404 * Moves a variable to a different register or spills it onto the stack.
4405 *
4406 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4407 * kinds can easily be recreated if needed later.
4408 *
4409 * @returns The new code buffer position.
4410 * @param pReNative The native recompile state.
4411 * @param off The current code buffer position.
4412 * @param idxVar The variable index.
4413 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4414 * call-volatile registers.
4415 */
4416DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4417 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4418{
4419 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4420 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4421 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4422 Assert(!pVar->fRegAcquired);
4423
4424 uint8_t const idxRegOld = pVar->idxReg;
4425 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4426 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4427 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4428 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4429 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4430 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4431 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4432 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4433#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4434 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4435#endif
4436
4437
4438 /** @todo Add statistics on this.*/
4439 /** @todo Implement basic variable liveness analysis (python) so variables
4440 * can be freed immediately once no longer used. This has the potential to
4441 * be trashing registers and stack for dead variables.
4442 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4443
4444 /*
4445 * First try move it to a different register, as that's cheaper.
4446 */
4447 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4448 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4449 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4450 if (fRegs)
4451 {
4452 /* Avoid using shadow registers, if possible. */
4453 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4454 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4455 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4456 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4457 }
4458
4459 /*
4460 * Otherwise we must spill the register onto the stack.
4461 */
4462 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4463 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4464 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4465 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4466
4467 pVar->idxReg = UINT8_MAX;
4468 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4469 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4470 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4471 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4472 return off;
4473}
4474
4475
4476/**
4477 * Allocates a temporary host general purpose register.
4478 *
4479 * This may emit code to save register content onto the stack in order to free
4480 * up a register.
4481 *
4482 * @returns The host register number; throws VBox status code on failure,
4483 * so no need to check the return value.
4484 * @param pReNative The native recompile state.
4485 * @param poff Pointer to the variable with the code buffer position.
4486 * This will be update if we need to move a variable from
4487 * register to stack in order to satisfy the request.
4488 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4489 * registers (@c true, default) or the other way around
4490 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4491 */
4492DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4493{
4494 /*
4495 * Try find a completely unused register, preferably a call-volatile one.
4496 */
4497 uint8_t idxReg;
4498 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4499 & ~pReNative->Core.bmHstRegsWithGstShadow
4500 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4501 if (fRegs)
4502 {
4503 if (fPreferVolatile)
4504 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4505 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4506 else
4507 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4508 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4509 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4510 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4511 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4512 }
4513 else
4514 {
4515 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4516 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4517 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4518 }
4519 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4520}
4521
4522
4523/**
4524 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4525 * registers.
4526 *
4527 * @returns The host register number; throws VBox status code on failure,
4528 * so no need to check the return value.
4529 * @param pReNative The native recompile state.
4530 * @param poff Pointer to the variable with the code buffer position.
4531 * This will be update if we need to move a variable from
4532 * register to stack in order to satisfy the request.
4533 * @param fRegMask Mask of acceptable registers.
4534 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4535 * registers (@c true, default) or the other way around
4536 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4537 */
4538DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4539 bool fPreferVolatile /*= true*/)
4540{
4541 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4542 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4543
4544 /*
4545 * Try find a completely unused register, preferably a call-volatile one.
4546 */
4547 uint8_t idxReg;
4548 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4549 & ~pReNative->Core.bmHstRegsWithGstShadow
4550 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4551 & fRegMask;
4552 if (fRegs)
4553 {
4554 if (fPreferVolatile)
4555 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4556 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4557 else
4558 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4559 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4560 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4561 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4562 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4563 }
4564 else
4565 {
4566 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4567 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4568 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4569 }
4570 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4571}
4572
4573
4574/**
4575 * Allocates a temporary register for loading an immediate value into.
4576 *
4577 * This will emit code to load the immediate, unless there happens to be an
4578 * unused register with the value already loaded.
4579 *
4580 * The caller will not modify the returned register, it must be considered
4581 * read-only. Free using iemNativeRegFreeTmpImm.
4582 *
4583 * @returns The host register number; throws VBox status code on failure, so no
4584 * need to check the return value.
4585 * @param pReNative The native recompile state.
4586 * @param poff Pointer to the variable with the code buffer position.
4587 * @param uImm The immediate value that the register must hold upon
4588 * return.
4589 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4590 * registers (@c true, default) or the other way around
4591 * (@c false).
4592 *
4593 * @note Reusing immediate values has not been implemented yet.
4594 */
4595DECL_HIDDEN_THROW(uint8_t)
4596iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4597{
4598 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4599 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4600 return idxReg;
4601}
4602
4603
4604/**
4605 * Allocates a temporary host general purpose register for keeping a guest
4606 * register value.
4607 *
4608 * Since we may already have a register holding the guest register value,
4609 * code will be emitted to do the loading if that's not the case. Code may also
4610 * be emitted if we have to free up a register to satify the request.
4611 *
4612 * @returns The host register number; throws VBox status code on failure, so no
4613 * need to check the return value.
4614 * @param pReNative The native recompile state.
4615 * @param poff Pointer to the variable with the code buffer
4616 * position. This will be update if we need to move a
4617 * variable from register to stack in order to satisfy
4618 * the request.
4619 * @param enmGstReg The guest register that will is to be updated.
4620 * @param enmIntendedUse How the caller will be using the host register.
4621 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4622 * register is okay (default). The ASSUMPTION here is
4623 * that the caller has already flushed all volatile
4624 * registers, so this is only applied if we allocate a
4625 * new register.
4626 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4627 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4628 */
4629DECL_HIDDEN_THROW(uint8_t)
4630iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4631 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4632 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4633{
4634 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4635#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4636 AssertMsg( fSkipLivenessAssert
4637 || pReNative->idxCurCall == 0
4638 || enmGstReg == kIemNativeGstReg_Pc
4639 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4640 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4641 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4642 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4643 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4644 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4645#endif
4646 RT_NOREF(fSkipLivenessAssert);
4647#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4648 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4649#endif
4650 uint32_t const fRegMask = !fNoVolatileRegs
4651 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4652 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4653
4654 /*
4655 * First check if the guest register value is already in a host register.
4656 */
4657 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4658 {
4659 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4660 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4661 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4662 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4663
4664 /* It's not supposed to be allocated... */
4665 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4666 {
4667 /*
4668 * If the register will trash the guest shadow copy, try find a
4669 * completely unused register we can use instead. If that fails,
4670 * we need to disassociate the host reg from the guest reg.
4671 */
4672 /** @todo would be nice to know if preserving the register is in any way helpful. */
4673 /* If the purpose is calculations, try duplicate the register value as
4674 we'll be clobbering the shadow. */
4675 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4676 && ( ~pReNative->Core.bmHstRegs
4677 & ~pReNative->Core.bmHstRegsWithGstShadow
4678 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4679 {
4680 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4681
4682 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4683
4684 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4685 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4686 g_apszIemNativeHstRegNames[idxRegNew]));
4687 idxReg = idxRegNew;
4688 }
4689 /* If the current register matches the restrictions, go ahead and allocate
4690 it for the caller. */
4691 else if (fRegMask & RT_BIT_32(idxReg))
4692 {
4693 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4694 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4695 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4696 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4697 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4698 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4699 else
4700 {
4701 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4702 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4703 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4704 }
4705 }
4706 /* Otherwise, allocate a register that satisfies the caller and transfer
4707 the shadowing if compatible with the intended use. (This basically
4708 means the call wants a non-volatile register (RSP push/pop scenario).) */
4709 else
4710 {
4711 Assert(fNoVolatileRegs);
4712 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4713 !fNoVolatileRegs
4714 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4715 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4716 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4717 {
4718 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4719 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4720 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4721 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4722 }
4723 else
4724 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4725 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4726 g_apszIemNativeHstRegNames[idxRegNew]));
4727 idxReg = idxRegNew;
4728 }
4729 }
4730 else
4731 {
4732 /*
4733 * Oops. Shadowed guest register already allocated!
4734 *
4735 * Allocate a new register, copy the value and, if updating, the
4736 * guest shadow copy assignment to the new register.
4737 */
4738 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4739 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4740 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4741 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4742
4743 /** @todo share register for readonly access. */
4744 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4745 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4746
4747 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4748 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4749
4750 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4751 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4752 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4753 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4754 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4755 else
4756 {
4757 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4758 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4759 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4760 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4761 }
4762 idxReg = idxRegNew;
4763 }
4764 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4765
4766#ifdef VBOX_STRICT
4767 /* Strict builds: Check that the value is correct. */
4768 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4769#endif
4770
4771#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4772 /** @todo r=aeichner Implement for registers other than GPR as well. */
4773 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4774 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4775 && enmGstReg >= kIemNativeGstReg_GprFirst
4776 && enmGstReg <= kIemNativeGstReg_GprLast
4777 )
4778 {
4779# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4780 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4781 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
4782# endif
4783 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4784 }
4785#endif
4786
4787 return idxReg;
4788 }
4789
4790 /*
4791 * Allocate a new register, load it with the guest value and designate it as a copy of the
4792 */
4793 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4794
4795 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4796 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4797
4798 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4799 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4800 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4801 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4802
4803#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4804 /** @todo r=aeichner Implement for registers other than GPR as well. */
4805 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4806 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4807 && enmGstReg >= kIemNativeGstReg_GprFirst
4808 && enmGstReg <= kIemNativeGstReg_GprLast
4809 )
4810 {
4811# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4812 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4813 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
4814# endif
4815 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4816 }
4817#endif
4818
4819 return idxRegNew;
4820}
4821
4822
4823/**
4824 * Allocates a temporary host general purpose register that already holds the
4825 * given guest register value.
4826 *
4827 * The use case for this function is places where the shadowing state cannot be
4828 * modified due to branching and such. This will fail if the we don't have a
4829 * current shadow copy handy or if it's incompatible. The only code that will
4830 * be emitted here is value checking code in strict builds.
4831 *
4832 * The intended use can only be readonly!
4833 *
4834 * @returns The host register number, UINT8_MAX if not present.
4835 * @param pReNative The native recompile state.
4836 * @param poff Pointer to the instruction buffer offset.
4837 * Will be updated in strict builds if a register is
4838 * found.
4839 * @param enmGstReg The guest register that will is to be updated.
4840 * @note In strict builds, this may throw instruction buffer growth failures.
4841 * Non-strict builds will not throw anything.
4842 * @sa iemNativeRegAllocTmpForGuestReg
4843 */
4844DECL_HIDDEN_THROW(uint8_t)
4845iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4846{
4847 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4848#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4849 AssertMsg( pReNative->idxCurCall == 0
4850 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4851 || enmGstReg == kIemNativeGstReg_Pc,
4852 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4853#endif
4854
4855 /*
4856 * First check if the guest register value is already in a host register.
4857 */
4858 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4859 {
4860 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4861 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4862 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4863 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4864
4865 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4866 {
4867 /*
4868 * We only do readonly use here, so easy compared to the other
4869 * variant of this code.
4870 */
4871 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4872 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4873 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4874 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4875 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4876
4877#ifdef VBOX_STRICT
4878 /* Strict builds: Check that the value is correct. */
4879 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4880#else
4881 RT_NOREF(poff);
4882#endif
4883 return idxReg;
4884 }
4885 }
4886
4887 return UINT8_MAX;
4888}
4889
4890
4891/**
4892 * Allocates argument registers for a function call.
4893 *
4894 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4895 * need to check the return value.
4896 * @param pReNative The native recompile state.
4897 * @param off The current code buffer offset.
4898 * @param cArgs The number of arguments the function call takes.
4899 */
4900DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4901{
4902 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4903 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4904 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4905 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4906
4907 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4908 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4909 else if (cArgs == 0)
4910 return true;
4911
4912 /*
4913 * Do we get luck and all register are free and not shadowing anything?
4914 */
4915 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4916 for (uint32_t i = 0; i < cArgs; i++)
4917 {
4918 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4919 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4920 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4921 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4922 }
4923 /*
4924 * Okay, not lucky so we have to free up the registers.
4925 */
4926 else
4927 for (uint32_t i = 0; i < cArgs; i++)
4928 {
4929 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4930 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4931 {
4932 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4933 {
4934 case kIemNativeWhat_Var:
4935 {
4936 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4938 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4939 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4940 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4941#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4942 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4943#endif
4944
4945 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4946 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4947 else
4948 {
4949 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4950 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4951 }
4952 break;
4953 }
4954
4955 case kIemNativeWhat_Tmp:
4956 case kIemNativeWhat_Arg:
4957 case kIemNativeWhat_rc:
4958 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4959 default:
4960 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4961 }
4962
4963 }
4964 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4965 {
4966 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4967 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4968 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4969#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4970 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4971#endif
4972 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4973 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4974 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4975 }
4976 else
4977 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4978 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4979 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4980 }
4981 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4982 return true;
4983}
4984
4985
4986DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4987
4988
4989#if 0
4990/**
4991 * Frees a register assignment of any type.
4992 *
4993 * @param pReNative The native recompile state.
4994 * @param idxHstReg The register to free.
4995 *
4996 * @note Does not update variables.
4997 */
4998DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4999{
5000 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5001 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5002 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
5003 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
5004 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
5005 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
5006 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
5007 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
5008 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
5009 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
5010 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5011 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5012 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
5013 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5014
5015 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5016 /* no flushing, right:
5017 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5018 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5019 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5020 */
5021}
5022#endif
5023
5024
5025/**
5026 * Frees a temporary register.
5027 *
5028 * Any shadow copies of guest registers assigned to the host register will not
5029 * be flushed by this operation.
5030 */
5031DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5032{
5033 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5034 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
5035 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5036 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
5037 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5038}
5039
5040
5041/**
5042 * Frees a temporary immediate register.
5043 *
5044 * It is assumed that the call has not modified the register, so it still hold
5045 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
5046 */
5047DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5048{
5049 iemNativeRegFreeTmp(pReNative, idxHstReg);
5050}
5051
5052
5053/**
5054 * Frees a register assigned to a variable.
5055 *
5056 * The register will be disassociated from the variable.
5057 */
5058DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5059{
5060 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5061 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5062 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
5063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5064 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5065#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5066 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5067#endif
5068
5069 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5070 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5071 if (!fFlushShadows)
5072 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5073 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
5074 else
5075 {
5076 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5077 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5078#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5079 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
5080#endif
5081 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5082 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
5083 uint64_t fGstRegShadows = fGstRegShadowsOld;
5084 while (fGstRegShadows)
5085 {
5086 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5087 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5088
5089 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
5090 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
5091 }
5092 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5093 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5094 }
5095}
5096
5097
5098#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5099# ifdef LOG_ENABLED
5100/** Host CPU SIMD register names. */
5101DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5102{
5103# ifdef RT_ARCH_AMD64
5104 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5105# elif RT_ARCH_ARM64
5106 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5107 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5108# else
5109# error "port me"
5110# endif
5111};
5112# endif
5113
5114
5115/**
5116 * Frees a SIMD register assigned to a variable.
5117 *
5118 * The register will be disassociated from the variable.
5119 */
5120DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5121{
5122 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
5123 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5124 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
5125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5126 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5127 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5128
5129 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5130 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
5131 if (!fFlushShadows)
5132 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5133 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
5134 else
5135 {
5136 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5137 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
5138 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5139 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
5140 uint64_t fGstRegShadows = fGstRegShadowsOld;
5141 while (fGstRegShadows)
5142 {
5143 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5144 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5145
5146 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
5147 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
5148 }
5149 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5150 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5151 }
5152}
5153
5154
5155/**
5156 * Reassigns a variable to a different SIMD register specified by the caller.
5157 *
5158 * @returns The new code buffer position.
5159 * @param pReNative The native recompile state.
5160 * @param off The current code buffer position.
5161 * @param idxVar The variable index.
5162 * @param idxRegOld The old host register number.
5163 * @param idxRegNew The new host register number.
5164 * @param pszCaller The caller for logging.
5165 */
5166static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5167 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
5168{
5169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5170 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
5171 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5172 RT_NOREF(pszCaller);
5173
5174 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5175 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
5176 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
5177
5178 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5179 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5180 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5181
5182 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
5183 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
5184 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
5185
5186 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
5187 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
5188 else
5189 {
5190 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
5191 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
5192 }
5193
5194 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
5195 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
5196 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
5197 if (fGstRegShadows)
5198 {
5199 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
5200 | RT_BIT_32(idxRegNew);
5201 while (fGstRegShadows)
5202 {
5203 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5204 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5205
5206 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
5207 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
5208 }
5209 }
5210
5211 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
5212 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5213 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
5214 return off;
5215}
5216
5217
5218/**
5219 * Moves a variable to a different register or spills it onto the stack.
5220 *
5221 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
5222 * kinds can easily be recreated if needed later.
5223 *
5224 * @returns The new code buffer position.
5225 * @param pReNative The native recompile state.
5226 * @param off The current code buffer position.
5227 * @param idxVar The variable index.
5228 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
5229 * call-volatile registers.
5230 */
5231DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5232 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
5233{
5234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5235 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5236 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
5237 Assert(!pVar->fRegAcquired);
5238 Assert(!pVar->fSimdReg);
5239
5240 uint8_t const idxRegOld = pVar->idxReg;
5241 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5242 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
5243 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
5244 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
5245 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
5246 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5247 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
5248 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5249 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5250 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5251
5252 /** @todo Add statistics on this.*/
5253 /** @todo Implement basic variable liveness analysis (python) so variables
5254 * can be freed immediately once no longer used. This has the potential to
5255 * be trashing registers and stack for dead variables.
5256 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
5257
5258 /*
5259 * First try move it to a different register, as that's cheaper.
5260 */
5261 fForbiddenRegs |= RT_BIT_32(idxRegOld);
5262 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
5263 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
5264 if (fRegs)
5265 {
5266 /* Avoid using shadow registers, if possible. */
5267 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
5268 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
5269 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
5270 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
5271 }
5272
5273 /*
5274 * Otherwise we must spill the register onto the stack.
5275 */
5276 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
5277 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
5278 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
5279
5280 if (pVar->cbVar == sizeof(RTUINT128U))
5281 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5282 else
5283 {
5284 Assert(pVar->cbVar == sizeof(RTUINT256U));
5285 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5286 }
5287
5288 pVar->idxReg = UINT8_MAX;
5289 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
5290 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
5291 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5292 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5293 return off;
5294}
5295
5296
5297/**
5298 * Called right before emitting a call instruction to move anything important
5299 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
5300 * optionally freeing argument variables.
5301 *
5302 * @returns New code buffer offset, UINT32_MAX on failure.
5303 * @param pReNative The native recompile state.
5304 * @param off The code buffer offset.
5305 * @param cArgs The number of arguments the function call takes.
5306 * It is presumed that the host register part of these have
5307 * been allocated as such already and won't need moving,
5308 * just freeing.
5309 * @param fKeepVars Mask of variables that should keep their register
5310 * assignments. Caller must take care to handle these.
5311 */
5312DECL_HIDDEN_THROW(uint32_t)
5313iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5314{
5315 Assert(!cArgs); RT_NOREF(cArgs);
5316
5317 /* fKeepVars will reduce this mask. */
5318 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5319
5320 /*
5321 * Move anything important out of volatile registers.
5322 */
5323 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5324#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
5325 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
5326#endif
5327 ;
5328
5329 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
5330 if (!fSimdRegsToMove)
5331 { /* likely */ }
5332 else
5333 {
5334 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
5335 while (fSimdRegsToMove != 0)
5336 {
5337 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
5338 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
5339
5340 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
5341 {
5342 case kIemNativeWhat_Var:
5343 {
5344 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
5345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5346 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5347 Assert(pVar->idxReg == idxSimdReg);
5348 Assert(pVar->fSimdReg);
5349 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5350 {
5351 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
5352 idxVar, pVar->enmKind, pVar->idxReg));
5353 if (pVar->enmKind != kIemNativeVarKind_Stack)
5354 pVar->idxReg = UINT8_MAX;
5355 else
5356 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
5357 }
5358 else
5359 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
5360 continue;
5361 }
5362
5363 case kIemNativeWhat_Arg:
5364 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
5365 continue;
5366
5367 case kIemNativeWhat_rc:
5368 case kIemNativeWhat_Tmp:
5369 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
5370 continue;
5371
5372 case kIemNativeWhat_FixedReserved:
5373#ifdef RT_ARCH_ARM64
5374 continue; /* On ARM the upper half of the virtual 256-bit register. */
5375#endif
5376
5377 case kIemNativeWhat_FixedTmp:
5378 case kIemNativeWhat_pVCpuFixed:
5379 case kIemNativeWhat_pCtxFixed:
5380 case kIemNativeWhat_PcShadow:
5381 case kIemNativeWhat_Invalid:
5382 case kIemNativeWhat_End:
5383 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5384 }
5385 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5386 }
5387 }
5388
5389 /*
5390 * Do the actual freeing.
5391 */
5392 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
5393 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
5394 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
5395 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
5396
5397 /* If there are guest register shadows in any call-volatile register, we
5398 have to clear the corrsponding guest register masks for each register. */
5399 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
5400 if (fHstSimdRegsWithGstShadow)
5401 {
5402 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5403 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
5404 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
5405 do
5406 {
5407 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
5408 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
5409
5410 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
5411
5412#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5413 /*
5414 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
5415 * to call volatile registers).
5416 */
5417 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5418 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
5419 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
5420#endif
5421 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5422 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
5423
5424 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
5425 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5426 } while (fHstSimdRegsWithGstShadow != 0);
5427 }
5428
5429 return off;
5430}
5431#endif
5432
5433
5434/**
5435 * Called right before emitting a call instruction to move anything important
5436 * out of call-volatile registers, free and flush the call-volatile registers,
5437 * optionally freeing argument variables.
5438 *
5439 * @returns New code buffer offset, UINT32_MAX on failure.
5440 * @param pReNative The native recompile state.
5441 * @param off The code buffer offset.
5442 * @param cArgs The number of arguments the function call takes.
5443 * It is presumed that the host register part of these have
5444 * been allocated as such already and won't need moving,
5445 * just freeing.
5446 * @param fKeepVars Mask of variables that should keep their register
5447 * assignments. Caller must take care to handle these.
5448 */
5449DECL_HIDDEN_THROW(uint32_t)
5450iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5451{
5452 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5453
5454 /* fKeepVars will reduce this mask. */
5455 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5456
5457 /*
5458 * Move anything important out of volatile registers.
5459 */
5460 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
5461 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
5462 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
5463#ifdef IEMNATIVE_REG_FIXED_TMP0
5464 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
5465#endif
5466#ifdef IEMNATIVE_REG_FIXED_TMP1
5467 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
5468#endif
5469#ifdef IEMNATIVE_REG_FIXED_PC_DBG
5470 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
5471#endif
5472 & ~g_afIemNativeCallRegs[cArgs];
5473
5474 fRegsToMove &= pReNative->Core.bmHstRegs;
5475 if (!fRegsToMove)
5476 { /* likely */ }
5477 else
5478 {
5479 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
5480 while (fRegsToMove != 0)
5481 {
5482 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
5483 fRegsToMove &= ~RT_BIT_32(idxReg);
5484
5485 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
5486 {
5487 case kIemNativeWhat_Var:
5488 {
5489 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
5490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5491 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5492 Assert(pVar->idxReg == idxReg);
5493#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5494 Assert(!pVar->fSimdReg);
5495#endif
5496 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5497 {
5498 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
5499 idxVar, pVar->enmKind, pVar->idxReg));
5500 if (pVar->enmKind != kIemNativeVarKind_Stack)
5501 pVar->idxReg = UINT8_MAX;
5502 else
5503 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5504 }
5505 else
5506 fRegsToFree &= ~RT_BIT_32(idxReg);
5507 continue;
5508 }
5509
5510 case kIemNativeWhat_Arg:
5511 AssertMsgFailed(("What?!?: %u\n", idxReg));
5512 continue;
5513
5514 case kIemNativeWhat_rc:
5515 case kIemNativeWhat_Tmp:
5516 AssertMsgFailed(("Missing free: %u\n", idxReg));
5517 continue;
5518
5519 case kIemNativeWhat_FixedTmp:
5520 case kIemNativeWhat_pVCpuFixed:
5521 case kIemNativeWhat_pCtxFixed:
5522 case kIemNativeWhat_PcShadow:
5523 case kIemNativeWhat_FixedReserved:
5524 case kIemNativeWhat_Invalid:
5525 case kIemNativeWhat_End:
5526 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5527 }
5528 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5529 }
5530 }
5531
5532 /*
5533 * Do the actual freeing.
5534 */
5535 if (pReNative->Core.bmHstRegs & fRegsToFree)
5536 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5537 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5538 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5539
5540 /* If there are guest register shadows in any call-volatile register, we
5541 have to clear the corrsponding guest register masks for each register. */
5542 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5543 if (fHstRegsWithGstShadow)
5544 {
5545 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5546 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5547 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5548 do
5549 {
5550 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5551 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5552
5553 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5554
5555#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5556 /*
5557 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
5558 * to call volatile registers).
5559 */
5560 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
5561 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
5562 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
5563#endif
5564
5565 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5566 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5567 } while (fHstRegsWithGstShadow != 0);
5568 }
5569
5570#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5571 /* Now for the SIMD registers, no argument support for now. */
5572 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
5573#endif
5574
5575 return off;
5576}
5577
5578
5579/**
5580 * Flushes a set of guest register shadow copies.
5581 *
5582 * This is usually done after calling a threaded function or a C-implementation
5583 * of an instruction.
5584 *
5585 * @param pReNative The native recompile state.
5586 * @param fGstRegs Set of guest registers to flush.
5587 */
5588DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5589{
5590 /*
5591 * Reduce the mask by what's currently shadowed
5592 */
5593 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5594 fGstRegs &= bmGstRegShadowsOld;
5595 if (fGstRegs)
5596 {
5597 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5598 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5599 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5600 if (bmGstRegShadowsNew)
5601 {
5602 /*
5603 * Partial.
5604 */
5605 do
5606 {
5607 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5608 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5609 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5610 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5611 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5612#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5613 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5614#endif
5615
5616 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5617 fGstRegs &= ~fInThisHstReg;
5618 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5619 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5620 if (!fGstRegShadowsNew)
5621 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5622 } while (fGstRegs != 0);
5623 }
5624 else
5625 {
5626 /*
5627 * Clear all.
5628 */
5629 do
5630 {
5631 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5632 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5633 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5634 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5635 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5636#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5637 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5638#endif
5639
5640 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5641 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5642 } while (fGstRegs != 0);
5643 pReNative->Core.bmHstRegsWithGstShadow = 0;
5644 }
5645 }
5646}
5647
5648
5649/**
5650 * Flushes guest register shadow copies held by a set of host registers.
5651 *
5652 * This is used with the TLB lookup code for ensuring that we don't carry on
5653 * with any guest shadows in volatile registers, as these will get corrupted by
5654 * a TLB miss.
5655 *
5656 * @param pReNative The native recompile state.
5657 * @param fHstRegs Set of host registers to flush guest shadows for.
5658 */
5659DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5660{
5661 /*
5662 * Reduce the mask by what's currently shadowed.
5663 */
5664 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5665 fHstRegs &= bmHstRegsWithGstShadowOld;
5666 if (fHstRegs)
5667 {
5668 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5669 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5670 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5671 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5672 if (bmHstRegsWithGstShadowNew)
5673 {
5674 /*
5675 * Partial (likely).
5676 */
5677 uint64_t fGstShadows = 0;
5678 do
5679 {
5680 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5681 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5682 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5683 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5684#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5685 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5686#endif
5687
5688 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5689 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5690 fHstRegs &= ~RT_BIT_32(idxHstReg);
5691 } while (fHstRegs != 0);
5692 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5693 }
5694 else
5695 {
5696 /*
5697 * Clear all.
5698 */
5699 do
5700 {
5701 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5702 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5703 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5704 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5705#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5706 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5707#endif
5708
5709 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5710 fHstRegs &= ~RT_BIT_32(idxHstReg);
5711 } while (fHstRegs != 0);
5712 pReNative->Core.bmGstRegShadows = 0;
5713 }
5714 }
5715}
5716
5717
5718/**
5719 * Restores guest shadow copies in volatile registers.
5720 *
5721 * This is used after calling a helper function (think TLB miss) to restore the
5722 * register state of volatile registers.
5723 *
5724 * @param pReNative The native recompile state.
5725 * @param off The code buffer offset.
5726 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5727 * be active (allocated) w/o asserting. Hack.
5728 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5729 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5730 */
5731DECL_HIDDEN_THROW(uint32_t)
5732iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5733{
5734 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5735 if (fHstRegs)
5736 {
5737 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5738 do
5739 {
5740 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5741
5742 /* It's not fatal if a register is active holding a variable that
5743 shadowing a guest register, ASSUMING all pending guest register
5744 writes were flushed prior to the helper call. However, we'll be
5745 emitting duplicate restores, so it wasts code space. */
5746 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5747 RT_NOREF(fHstRegsActiveShadows);
5748
5749 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5750#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5751 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
5752#endif
5753 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5754 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5755 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5756
5757 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5758 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5759
5760 fHstRegs &= ~RT_BIT_32(idxHstReg);
5761 } while (fHstRegs != 0);
5762 }
5763 return off;
5764}
5765
5766
5767
5768
5769/*********************************************************************************************************************************
5770* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5771*********************************************************************************************************************************/
5772#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5773
5774/**
5775 * Info about shadowed guest SIMD register values.
5776 * @see IEMNATIVEGSTSIMDREG
5777 */
5778static struct
5779{
5780 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5781 uint32_t offXmm;
5782 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5783 uint32_t offYmm;
5784 /** Name (for logging). */
5785 const char *pszName;
5786} const g_aGstSimdShadowInfo[] =
5787{
5788#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5789 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5790 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5791 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5792 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5793 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5794 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5795 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5796 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5797 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5798 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5799 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5800 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5801 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5802 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5803 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5804 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5805 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5806#undef CPUMCTX_OFF_AND_SIZE
5807};
5808AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5809
5810
5811/**
5812 * Frees a temporary SIMD register.
5813 *
5814 * Any shadow copies of guest registers assigned to the host register will not
5815 * be flushed by this operation.
5816 */
5817DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5818{
5819 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5820 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5821 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5822 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5823 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5824}
5825
5826
5827/**
5828 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5829 *
5830 * @returns New code bufferoffset.
5831 * @param pReNative The native recompile state.
5832 * @param off Current code buffer position.
5833 * @param enmGstSimdReg The guest SIMD register to flush.
5834 */
5835DECL_HIDDEN_THROW(uint32_t)
5836iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5837{
5838 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5839
5840 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5841 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5842 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5843 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5844
5845 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5846 {
5847 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5848 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5849 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5850 }
5851
5852 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5853 {
5854 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5855 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5856 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5857 }
5858
5859 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5860 return off;
5861}
5862
5863
5864/**
5865 * Flush the given set of guest SIMD registers if marked as dirty.
5866 *
5867 * @returns New code buffer offset.
5868 * @param pReNative The native recompile state.
5869 * @param off Current code buffer position.
5870 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5871 */
5872DECL_HIDDEN_THROW(uint32_t)
5873iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5874{
5875 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5876 & fFlushGstSimdReg;
5877 if (bmGstSimdRegShadowDirty)
5878 {
5879# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5880 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5881 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5882# endif
5883
5884 do
5885 {
5886 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5887 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5888 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5889 } while (bmGstSimdRegShadowDirty);
5890 }
5891
5892 return off;
5893}
5894
5895
5896#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5897/**
5898 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5899 *
5900 * @returns New code buffer offset.
5901 * @param pReNative The native recompile state.
5902 * @param off Current code buffer position.
5903 * @param idxHstSimdReg The host SIMD register.
5904 *
5905 * @note This doesn't do any unshadowing of guest registers from the host register.
5906 */
5907DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5908{
5909 /* We need to flush any pending guest register writes this host register shadows. */
5910 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5911 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5912 if (bmGstSimdRegShadowDirty)
5913 {
5914# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5915 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5916 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5917# endif
5918
5919 do
5920 {
5921 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5922 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5923 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5924 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5925 } while (bmGstSimdRegShadowDirty);
5926 }
5927
5928 return off;
5929}
5930#endif
5931
5932
5933/**
5934 * Locate a register, possibly freeing one up.
5935 *
5936 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5937 * failed.
5938 *
5939 * @returns Host register number on success. Returns UINT8_MAX if no registers
5940 * found, the caller is supposed to deal with this and raise a
5941 * allocation type specific status code (if desired).
5942 *
5943 * @throws VBox status code if we're run into trouble spilling a variable of
5944 * recording debug info. Does NOT throw anything if we're out of
5945 * registers, though.
5946 */
5947static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5948 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5949{
5950 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5951 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5952 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5953
5954 /*
5955 * Try a freed register that's shadowing a guest register.
5956 */
5957 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5958 if (fRegs)
5959 {
5960 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5961
5962#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5963 /*
5964 * When we have livness information, we use it to kick out all shadowed
5965 * guest register that will not be needed any more in this TB. If we're
5966 * lucky, this may prevent us from ending up here again.
5967 *
5968 * Note! We must consider the previous entry here so we don't free
5969 * anything that the current threaded function requires (current
5970 * entry is produced by the next threaded function).
5971 */
5972 uint32_t const idxCurCall = pReNative->idxCurCall;
5973 if (idxCurCall > 0)
5974 {
5975 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5976
5977# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5978 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5979 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5980 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5981#else
5982 /* Construct a mask of the registers not in the read or write state.
5983 Note! We could skips writes, if they aren't from us, as this is just
5984 a hack to prevent trashing registers that have just been written
5985 or will be written when we retire the current instruction. */
5986 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5987 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5988 & IEMLIVENESSBIT_MASK;
5989#endif
5990 /* If it matches any shadowed registers. */
5991 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5992 {
5993 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5994 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5995 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5996
5997 /* See if we've got any unshadowed registers we can return now. */
5998 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5999 if (fUnshadowedRegs)
6000 {
6001 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
6002 return (fPreferVolatile
6003 ? ASMBitFirstSetU32(fUnshadowedRegs)
6004 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6005 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
6006 - 1;
6007 }
6008 }
6009 }
6010#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
6011
6012 unsigned const idxReg = (fPreferVolatile
6013 ? ASMBitFirstSetU32(fRegs)
6014 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6015 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
6016 - 1;
6017
6018 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
6019 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
6020 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
6021 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
6022
6023 /* We need to flush any pending guest register writes this host SIMD register shadows. */
6024 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
6025
6026 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
6027 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
6028 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
6029 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6030 return idxReg;
6031 }
6032
6033 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
6034
6035 /*
6036 * Try free up a variable that's in a register.
6037 *
6038 * We do two rounds here, first evacuating variables we don't need to be
6039 * saved on the stack, then in the second round move things to the stack.
6040 */
6041 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
6042 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
6043 {
6044 uint32_t fVars = pReNative->Core.bmVars;
6045 while (fVars)
6046 {
6047 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
6048 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
6049 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
6050 continue;
6051
6052 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
6053 && (RT_BIT_32(idxReg) & fRegMask)
6054 && ( iLoop == 0
6055 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
6056 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6057 && !pReNative->Core.aVars[idxVar].fRegAcquired)
6058 {
6059 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
6060 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
6061 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
6062 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
6063 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
6064 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
6065
6066 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6067 {
6068 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6069 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
6070 }
6071
6072 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6073 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
6074
6075 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
6076 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
6077 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
6078 return idxReg;
6079 }
6080 fVars &= ~RT_BIT_32(idxVar);
6081 }
6082 }
6083
6084 AssertFailed();
6085 return UINT8_MAX;
6086}
6087
6088
6089/**
6090 * Flushes a set of guest register shadow copies.
6091 *
6092 * This is usually done after calling a threaded function or a C-implementation
6093 * of an instruction.
6094 *
6095 * @param pReNative The native recompile state.
6096 * @param fGstSimdRegs Set of guest SIMD registers to flush.
6097 */
6098DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
6099{
6100 /*
6101 * Reduce the mask by what's currently shadowed
6102 */
6103 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
6104 fGstSimdRegs &= bmGstSimdRegShadows;
6105 if (fGstSimdRegs)
6106 {
6107 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
6108 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
6109 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
6110 if (bmGstSimdRegShadowsNew)
6111 {
6112 /*
6113 * Partial.
6114 */
6115 do
6116 {
6117 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6118 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6119 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6120 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6121 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6122 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6123
6124 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
6125 fGstSimdRegs &= ~fInThisHstReg;
6126 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
6127 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
6128 if (!fGstRegShadowsNew)
6129 {
6130 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6131 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6132 }
6133 } while (fGstSimdRegs != 0);
6134 }
6135 else
6136 {
6137 /*
6138 * Clear all.
6139 */
6140 do
6141 {
6142 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6143 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6144 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6145 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6146 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6147 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6148
6149 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
6150 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
6151 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6152 } while (fGstSimdRegs != 0);
6153 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
6154 }
6155 }
6156}
6157
6158
6159/**
6160 * Allocates a temporary host SIMD register.
6161 *
6162 * This may emit code to save register content onto the stack in order to free
6163 * up a register.
6164 *
6165 * @returns The host register number; throws VBox status code on failure,
6166 * so no need to check the return value.
6167 * @param pReNative The native recompile state.
6168 * @param poff Pointer to the variable with the code buffer position.
6169 * This will be update if we need to move a variable from
6170 * register to stack in order to satisfy the request.
6171 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6172 * registers (@c true, default) or the other way around
6173 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6174 */
6175DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
6176{
6177 /*
6178 * Try find a completely unused register, preferably a call-volatile one.
6179 */
6180 uint8_t idxSimdReg;
6181 uint32_t fRegs = ~pReNative->Core.bmHstRegs
6182 & ~pReNative->Core.bmHstRegsWithGstShadow
6183 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
6184 if (fRegs)
6185 {
6186 if (fPreferVolatile)
6187 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6188 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6189 else
6190 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6191 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6192 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6193 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6194
6195 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6196 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6197 }
6198 else
6199 {
6200 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
6201 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6202 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6203 }
6204
6205 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6206 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6207}
6208
6209
6210/**
6211 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
6212 * registers.
6213 *
6214 * @returns The host register number; throws VBox status code on failure,
6215 * so no need to check the return value.
6216 * @param pReNative The native recompile state.
6217 * @param poff Pointer to the variable with the code buffer position.
6218 * This will be update if we need to move a variable from
6219 * register to stack in order to satisfy the request.
6220 * @param fRegMask Mask of acceptable registers.
6221 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6222 * registers (@c true, default) or the other way around
6223 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6224 */
6225DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
6226 bool fPreferVolatile /*= true*/)
6227{
6228 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
6229 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
6230
6231 /*
6232 * Try find a completely unused register, preferably a call-volatile one.
6233 */
6234 uint8_t idxSimdReg;
6235 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
6236 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6237 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
6238 & fRegMask;
6239 if (fRegs)
6240 {
6241 if (fPreferVolatile)
6242 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6243 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6244 else
6245 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6246 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6247 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6248 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6249
6250 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6251 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6252 }
6253 else
6254 {
6255 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
6256 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6257 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6258 }
6259
6260 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6261 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6262}
6263
6264
6265/**
6266 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
6267 *
6268 * @param pReNative The native recompile state.
6269 * @param idxHstSimdReg The host SIMD register to update the state for.
6270 * @param enmLoadSz The load size to set.
6271 */
6272DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
6273 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6274{
6275 /* Everything valid already? -> nothing to do. */
6276 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6277 return;
6278
6279 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
6280 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6281 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
6282 {
6283 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
6284 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6285 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
6286 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
6287 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
6288 }
6289}
6290
6291
6292static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
6293 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
6294{
6295 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
6296 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
6297 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6298 {
6299# ifdef RT_ARCH_ARM64
6300 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
6301 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
6302# endif
6303
6304 if (idxHstSimdRegDst != idxHstSimdRegSrc)
6305 {
6306 switch (enmLoadSzDst)
6307 {
6308 case kIemNativeGstSimdRegLdStSz_256:
6309 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6310 break;
6311 case kIemNativeGstSimdRegLdStSz_Low128:
6312 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6313 break;
6314 case kIemNativeGstSimdRegLdStSz_High128:
6315 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6316 break;
6317 default:
6318 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6319 }
6320
6321 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
6322 }
6323 }
6324 else
6325 {
6326 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
6327 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
6328 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
6329 }
6330
6331 return off;
6332}
6333
6334
6335/**
6336 * Allocates a temporary host SIMD register for keeping a guest
6337 * SIMD register value.
6338 *
6339 * Since we may already have a register holding the guest register value,
6340 * code will be emitted to do the loading if that's not the case. Code may also
6341 * be emitted if we have to free up a register to satify the request.
6342 *
6343 * @returns The host register number; throws VBox status code on failure, so no
6344 * need to check the return value.
6345 * @param pReNative The native recompile state.
6346 * @param poff Pointer to the variable with the code buffer
6347 * position. This will be update if we need to move a
6348 * variable from register to stack in order to satisfy
6349 * the request.
6350 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
6351 * @param enmIntendedUse How the caller will be using the host register.
6352 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
6353 * register is okay (default). The ASSUMPTION here is
6354 * that the caller has already flushed all volatile
6355 * registers, so this is only applied if we allocate a
6356 * new register.
6357 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
6358 */
6359DECL_HIDDEN_THROW(uint8_t)
6360iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6361 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
6362 bool fNoVolatileRegs /*= false*/)
6363{
6364 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
6365#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
6366 AssertMsg( pReNative->idxCurCall == 0
6367 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6368 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6369 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
6370 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6371 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
6372 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
6373#endif
6374#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
6375 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
6376#endif
6377 uint32_t const fRegMask = !fNoVolatileRegs
6378 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
6379 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
6380
6381 /*
6382 * First check if the guest register value is already in a host register.
6383 */
6384 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
6385 {
6386 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
6387 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
6388 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
6389 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
6390
6391 /* It's not supposed to be allocated... */
6392 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
6393 {
6394 /*
6395 * If the register will trash the guest shadow copy, try find a
6396 * completely unused register we can use instead. If that fails,
6397 * we need to disassociate the host reg from the guest reg.
6398 */
6399 /** @todo would be nice to know if preserving the register is in any way helpful. */
6400 /* If the purpose is calculations, try duplicate the register value as
6401 we'll be clobbering the shadow. */
6402 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
6403 && ( ~pReNative->Core.bmHstSimdRegs
6404 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6405 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
6406 {
6407 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
6408
6409 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6410
6411 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6412 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6413 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6414 idxSimdReg = idxRegNew;
6415 }
6416 /* If the current register matches the restrictions, go ahead and allocate
6417 it for the caller. */
6418 else if (fRegMask & RT_BIT_32(idxSimdReg))
6419 {
6420 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
6421 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
6422 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6423 {
6424 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6425 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
6426 else
6427 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
6428 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
6429 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6430 }
6431 else
6432 {
6433 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
6434 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
6435 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
6436 }
6437 }
6438 /* Otherwise, allocate a register that satisfies the caller and transfer
6439 the shadowing if compatible with the intended use. (This basically
6440 means the call wants a non-volatile register (RSP push/pop scenario).) */
6441 else
6442 {
6443 Assert(fNoVolatileRegs);
6444 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
6445 !fNoVolatileRegs
6446 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
6447 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6448 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6449 {
6450 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6451 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
6452 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
6453 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6454 }
6455 else
6456 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6457 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6458 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6459 idxSimdReg = idxRegNew;
6460 }
6461 }
6462 else
6463 {
6464 /*
6465 * Oops. Shadowed guest register already allocated!
6466 *
6467 * Allocate a new register, copy the value and, if updating, the
6468 * guest shadow copy assignment to the new register.
6469 */
6470 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6471 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
6472 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
6473 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
6474
6475 /** @todo share register for readonly access. */
6476 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
6477 enmIntendedUse == kIemNativeGstRegUse_Calculation);
6478
6479 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6480 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6481 else
6482 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6483
6484 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6485 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6486 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
6487 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6488 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6489 else
6490 {
6491 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6492 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
6493 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6494 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6495 }
6496 idxSimdReg = idxRegNew;
6497 }
6498 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
6499
6500#ifdef VBOX_STRICT
6501 /* Strict builds: Check that the value is correct. */
6502 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6503 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
6504#endif
6505
6506 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6507 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6508 {
6509# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6510 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6511 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
6512# endif
6513
6514 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6515 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6516 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6517 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6518 else
6519 {
6520 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6521 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6522 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6523 }
6524 }
6525
6526 return idxSimdReg;
6527 }
6528
6529 /*
6530 * Allocate a new register, load it with the guest value and designate it as a copy of the
6531 */
6532 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
6533
6534 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6535 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
6536 else
6537 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6538
6539 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6540 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
6541
6542 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6543 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6544 {
6545# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6546 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6547 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
6548# endif
6549
6550 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6551 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6552 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6553 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6554 else
6555 {
6556 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6557 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6558 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6559 }
6560 }
6561
6562 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
6563 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6564
6565 return idxRegNew;
6566}
6567
6568
6569/**
6570 * Flushes guest SIMD register shadow copies held by a set of host registers.
6571 *
6572 * This is used whenever calling an external helper for ensuring that we don't carry on
6573 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
6574 *
6575 * @param pReNative The native recompile state.
6576 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
6577 */
6578DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
6579{
6580 /*
6581 * Reduce the mask by what's currently shadowed.
6582 */
6583 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
6584 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
6585 if (fHstSimdRegs)
6586 {
6587 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
6588 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
6589 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
6590 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
6591 if (bmHstSimdRegsWithGstShadowNew)
6592 {
6593 /*
6594 * Partial (likely).
6595 */
6596 uint64_t fGstShadows = 0;
6597 do
6598 {
6599 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6600 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6601 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6602 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6603 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6604 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6605
6606 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
6607 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6608 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6609 } while (fHstSimdRegs != 0);
6610 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
6611 }
6612 else
6613 {
6614 /*
6615 * Clear all.
6616 */
6617 do
6618 {
6619 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6620 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6621 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6622 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6623 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6624 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6625
6626 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6627 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6628 } while (fHstSimdRegs != 0);
6629 pReNative->Core.bmGstSimdRegShadows = 0;
6630 }
6631 }
6632}
6633#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6634
6635
6636
6637/*********************************************************************************************************************************
6638* Code emitters for flushing pending guest register writes and sanity checks *
6639*********************************************************************************************************************************/
6640
6641#ifdef VBOX_STRICT
6642/**
6643 * Does internal register allocator sanity checks.
6644 */
6645DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6646{
6647 /*
6648 * Iterate host registers building a guest shadowing set.
6649 */
6650 uint64_t bmGstRegShadows = 0;
6651 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6652 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6653 while (bmHstRegsWithGstShadow)
6654 {
6655 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6656 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6657 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6658
6659 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6660 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6661 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6662 bmGstRegShadows |= fThisGstRegShadows;
6663 while (fThisGstRegShadows)
6664 {
6665 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6666 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6667 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6668 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6669 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6670 }
6671 }
6672 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6673 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6674 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6675
6676 /*
6677 * Now the other way around, checking the guest to host index array.
6678 */
6679 bmHstRegsWithGstShadow = 0;
6680 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6681 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6682 while (bmGstRegShadows)
6683 {
6684 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6685 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6686 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6687
6688 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6689 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6690 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6691 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6692 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6693 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6694 }
6695 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6696 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6697 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6698}
6699#endif /* VBOX_STRICT */
6700
6701
6702/**
6703 * Flushes any delayed guest register writes.
6704 *
6705 * This must be called prior to calling CImpl functions and any helpers that use
6706 * the guest state (like raising exceptions) and such.
6707 *
6708 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
6709 * the caller if it wishes to do so.
6710 */
6711DECL_HIDDEN_THROW(uint32_t)
6712iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
6713{
6714#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6715 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6716 off = iemNativeEmitPcWriteback(pReNative, off);
6717#else
6718 RT_NOREF(pReNative, fGstShwExcept);
6719#endif
6720
6721#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6722 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
6723#endif
6724
6725#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6726 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
6727#endif
6728
6729 return off;
6730}
6731
6732
6733#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6734/**
6735 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6736 */
6737DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6738{
6739 Assert(pReNative->Core.offPc);
6740# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6741 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6742 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6743# endif
6744
6745# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6746 /* Allocate a temporary PC register. */
6747 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6748
6749 /* Perform the addition and store the result. */
6750 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6751 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6752
6753 /* Free but don't flush the PC register. */
6754 iemNativeRegFreeTmp(pReNative, idxPcReg);
6755# else
6756 /* Compare the shadow with the context value, they should match. */
6757 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6758 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6759# endif
6760
6761 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6762 pReNative->Core.offPc = 0;
6763 pReNative->Core.cInstrPcUpdateSkipped = 0;
6764
6765 return off;
6766}
6767#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6768
6769
6770/*********************************************************************************************************************************
6771* Code Emitters (larger snippets) *
6772*********************************************************************************************************************************/
6773
6774/**
6775 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6776 * extending to 64-bit width.
6777 *
6778 * @returns New code buffer offset on success, UINT32_MAX on failure.
6779 * @param pReNative .
6780 * @param off The current code buffer position.
6781 * @param idxHstReg The host register to load the guest register value into.
6782 * @param enmGstReg The guest register to load.
6783 *
6784 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6785 * that is something the caller needs to do if applicable.
6786 */
6787DECL_HIDDEN_THROW(uint32_t)
6788iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6789{
6790 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6791 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6792
6793 switch (g_aGstShadowInfo[enmGstReg].cb)
6794 {
6795 case sizeof(uint64_t):
6796 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6797 case sizeof(uint32_t):
6798 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6799 case sizeof(uint16_t):
6800 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6801#if 0 /* not present in the table. */
6802 case sizeof(uint8_t):
6803 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6804#endif
6805 default:
6806 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6807 }
6808}
6809
6810
6811#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6812/**
6813 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6814 *
6815 * @returns New code buffer offset on success, UINT32_MAX on failure.
6816 * @param pReNative The recompiler state.
6817 * @param off The current code buffer position.
6818 * @param idxHstSimdReg The host register to load the guest register value into.
6819 * @param enmGstSimdReg The guest register to load.
6820 * @param enmLoadSz The load size of the register.
6821 *
6822 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6823 * that is something the caller needs to do if applicable.
6824 */
6825DECL_HIDDEN_THROW(uint32_t)
6826iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6827 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6828{
6829 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6830
6831 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6832 switch (enmLoadSz)
6833 {
6834 case kIemNativeGstSimdRegLdStSz_256:
6835 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6836 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6837 case kIemNativeGstSimdRegLdStSz_Low128:
6838 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6839 case kIemNativeGstSimdRegLdStSz_High128:
6840 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6841 default:
6842 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6843 }
6844}
6845#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6846
6847#ifdef VBOX_STRICT
6848
6849/**
6850 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6851 *
6852 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6853 * Trashes EFLAGS on AMD64.
6854 */
6855DECL_HIDDEN_THROW(uint32_t)
6856iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6857{
6858# ifdef RT_ARCH_AMD64
6859 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6860
6861 /* rol reg64, 32 */
6862 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6863 pbCodeBuf[off++] = 0xc1;
6864 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6865 pbCodeBuf[off++] = 32;
6866
6867 /* test reg32, ffffffffh */
6868 if (idxReg >= 8)
6869 pbCodeBuf[off++] = X86_OP_REX_B;
6870 pbCodeBuf[off++] = 0xf7;
6871 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6872 pbCodeBuf[off++] = 0xff;
6873 pbCodeBuf[off++] = 0xff;
6874 pbCodeBuf[off++] = 0xff;
6875 pbCodeBuf[off++] = 0xff;
6876
6877 /* je/jz +1 */
6878 pbCodeBuf[off++] = 0x74;
6879 pbCodeBuf[off++] = 0x01;
6880
6881 /* int3 */
6882 pbCodeBuf[off++] = 0xcc;
6883
6884 /* rol reg64, 32 */
6885 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6886 pbCodeBuf[off++] = 0xc1;
6887 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6888 pbCodeBuf[off++] = 32;
6889
6890# elif defined(RT_ARCH_ARM64)
6891 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6892 /* lsr tmp0, reg64, #32 */
6893 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6894 /* cbz tmp0, +1 */
6895 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6896 /* brk #0x1100 */
6897 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6898
6899# else
6900# error "Port me!"
6901# endif
6902 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6903 return off;
6904}
6905
6906
6907/**
6908 * Emitting code that checks that the content of register @a idxReg is the same
6909 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6910 * instruction if that's not the case.
6911 *
6912 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6913 * Trashes EFLAGS on AMD64.
6914 */
6915DECL_HIDDEN_THROW(uint32_t)
6916iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6917{
6918#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6919 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6920 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6921 return off;
6922#endif
6923
6924# ifdef RT_ARCH_AMD64
6925 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6926
6927 /* cmp reg, [mem] */
6928 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6929 {
6930 if (idxReg >= 8)
6931 pbCodeBuf[off++] = X86_OP_REX_R;
6932 pbCodeBuf[off++] = 0x38;
6933 }
6934 else
6935 {
6936 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6937 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6938 else
6939 {
6940 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6941 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6942 else
6943 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6944 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6945 if (idxReg >= 8)
6946 pbCodeBuf[off++] = X86_OP_REX_R;
6947 }
6948 pbCodeBuf[off++] = 0x39;
6949 }
6950 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6951
6952 /* je/jz +1 */
6953 pbCodeBuf[off++] = 0x74;
6954 pbCodeBuf[off++] = 0x01;
6955
6956 /* int3 */
6957 pbCodeBuf[off++] = 0xcc;
6958
6959 /* For values smaller than the register size, we must check that the rest
6960 of the register is all zeros. */
6961 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6962 {
6963 /* test reg64, imm32 */
6964 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6965 pbCodeBuf[off++] = 0xf7;
6966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6967 pbCodeBuf[off++] = 0;
6968 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6969 pbCodeBuf[off++] = 0xff;
6970 pbCodeBuf[off++] = 0xff;
6971
6972 /* je/jz +1 */
6973 pbCodeBuf[off++] = 0x74;
6974 pbCodeBuf[off++] = 0x01;
6975
6976 /* int3 */
6977 pbCodeBuf[off++] = 0xcc;
6978 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6979 }
6980 else
6981 {
6982 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6983 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6984 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6985 }
6986
6987# elif defined(RT_ARCH_ARM64)
6988 /* mov TMP0, [gstreg] */
6989 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6990
6991 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6992 /* sub tmp0, tmp0, idxReg */
6993 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6994 /* cbz tmp0, +1 */
6995 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6996 /* brk #0x1000+enmGstReg */
6997 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6998 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6999
7000# else
7001# error "Port me!"
7002# endif
7003 return off;
7004}
7005
7006
7007# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7008# ifdef RT_ARCH_AMD64
7009/**
7010 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
7011 */
7012DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
7013{
7014 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
7015 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7016 if (idxSimdReg >= 8)
7017 pbCodeBuf[off++] = X86_OP_REX_R;
7018 pbCodeBuf[off++] = 0x0f;
7019 pbCodeBuf[off++] = 0x38;
7020 pbCodeBuf[off++] = 0x29;
7021 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
7022
7023 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
7024 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7025 pbCodeBuf[off++] = X86_OP_REX_W
7026 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
7027 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7028 pbCodeBuf[off++] = 0x0f;
7029 pbCodeBuf[off++] = 0x3a;
7030 pbCodeBuf[off++] = 0x16;
7031 pbCodeBuf[off++] = 0xeb;
7032 pbCodeBuf[off++] = 0x00;
7033
7034 /* cmp tmp0, 0xffffffffffffffff. */
7035 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7036 pbCodeBuf[off++] = 0x83;
7037 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7038 pbCodeBuf[off++] = 0xff;
7039
7040 /* je/jz +1 */
7041 pbCodeBuf[off++] = 0x74;
7042 pbCodeBuf[off++] = 0x01;
7043
7044 /* int3 */
7045 pbCodeBuf[off++] = 0xcc;
7046
7047 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
7048 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7049 pbCodeBuf[off++] = X86_OP_REX_W
7050 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
7051 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7052 pbCodeBuf[off++] = 0x0f;
7053 pbCodeBuf[off++] = 0x3a;
7054 pbCodeBuf[off++] = 0x16;
7055 pbCodeBuf[off++] = 0xeb;
7056 pbCodeBuf[off++] = 0x01;
7057
7058 /* cmp tmp0, 0xffffffffffffffff. */
7059 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7060 pbCodeBuf[off++] = 0x83;
7061 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7062 pbCodeBuf[off++] = 0xff;
7063
7064 /* je/jz +1 */
7065 pbCodeBuf[off++] = 0x74;
7066 pbCodeBuf[off++] = 0x01;
7067
7068 /* int3 */
7069 pbCodeBuf[off++] = 0xcc;
7070
7071 return off;
7072}
7073# endif
7074
7075
7076/**
7077 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
7078 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
7079 * instruction if that's not the case.
7080 *
7081 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
7082 * Trashes EFLAGS on AMD64.
7083 */
7084DECL_HIDDEN_THROW(uint32_t)
7085iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
7086 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
7087{
7088 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
7089 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
7090 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
7091 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7092 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
7093 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
7094 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
7095 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7096 return off;
7097
7098# ifdef RT_ARCH_AMD64
7099 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7100 {
7101 /* movdqa vectmp0, idxSimdReg */
7102 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7103
7104 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
7105
7106 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7107 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
7108 }
7109
7110 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7111 {
7112 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
7113 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
7114
7115 /* vextracti128 vectmp0, idxSimdReg, 1 */
7116 pbCodeBuf[off++] = X86_OP_VEX3;
7117 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
7118 | X86_OP_VEX3_BYTE1_X
7119 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
7120 | 0x03; /* Opcode map */
7121 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
7122 pbCodeBuf[off++] = 0x39;
7123 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
7124 pbCodeBuf[off++] = 0x01;
7125
7126 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7127 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
7128 }
7129# elif defined(RT_ARCH_ARM64)
7130 /* mov vectmp0, [gstreg] */
7131 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
7132
7133 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7134 {
7135 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7136 /* eor vectmp0, vectmp0, idxSimdReg */
7137 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7138 /* uaddlv vectmp0, vectmp0.16B */
7139 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
7140 /* umov tmp0, vectmp0.H[0] */
7141 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7142 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7143 /* cbz tmp0, +1 */
7144 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7145 /* brk #0x1000+enmGstReg */
7146 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7147 }
7148
7149 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7150 {
7151 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7152 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
7153 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
7154 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
7155 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
7156 /* umov tmp0, (vectmp0 + 1).H[0] */
7157 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
7158 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7159 /* cbz tmp0, +1 */
7160 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7161 /* brk #0x1000+enmGstReg */
7162 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7163 }
7164
7165# else
7166# error "Port me!"
7167# endif
7168
7169 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7170 return off;
7171}
7172# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
7173
7174
7175/**
7176 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
7177 * important bits.
7178 *
7179 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
7180 * Trashes EFLAGS on AMD64.
7181 */
7182DECL_HIDDEN_THROW(uint32_t)
7183iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
7184{
7185 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7186 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
7187 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
7188 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
7189
7190#ifdef RT_ARCH_AMD64
7191 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7192
7193 /* je/jz +1 */
7194 pbCodeBuf[off++] = 0x74;
7195 pbCodeBuf[off++] = 0x01;
7196
7197 /* int3 */
7198 pbCodeBuf[off++] = 0xcc;
7199
7200# elif defined(RT_ARCH_ARM64)
7201 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7202
7203 /* b.eq +1 */
7204 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
7205 /* brk #0x2000 */
7206 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
7207
7208# else
7209# error "Port me!"
7210# endif
7211 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7212
7213 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7214 return off;
7215}
7216
7217#endif /* VBOX_STRICT */
7218
7219
7220#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
7221/**
7222 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
7223 */
7224DECL_HIDDEN_THROW(uint32_t)
7225iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
7226{
7227 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
7228
7229 fEflNeeded &= X86_EFL_STATUS_BITS;
7230 if (fEflNeeded)
7231 {
7232# ifdef RT_ARCH_AMD64
7233 /* test dword [pVCpu + offVCpu], imm32 */
7234 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7235 if (fEflNeeded <= 0xff)
7236 {
7237 pCodeBuf[off++] = 0xf6;
7238 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7239 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7240 }
7241 else
7242 {
7243 pCodeBuf[off++] = 0xf7;
7244 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7245 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7246 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
7247 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
7248 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
7249 }
7250 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7251
7252# else
7253 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7254 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
7255 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
7256# ifdef RT_ARCH_ARM64
7257 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
7258 off = iemNativeEmitBrk(pReNative, off, 0x7777);
7259# else
7260# error "Port me!"
7261# endif
7262 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7263# endif
7264 }
7265 return off;
7266}
7267#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
7268
7269
7270/**
7271 * Emits a code for checking the return code of a call and rcPassUp, returning
7272 * from the code if either are non-zero.
7273 */
7274DECL_HIDDEN_THROW(uint32_t)
7275iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7276{
7277#ifdef RT_ARCH_AMD64
7278 /*
7279 * AMD64: eax = call status code.
7280 */
7281
7282 /* edx = rcPassUp */
7283 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7284# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7285 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
7286# endif
7287
7288 /* edx = eax | rcPassUp */
7289 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7290 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
7291 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
7292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7293
7294 /* Jump to non-zero status return path. */
7295 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
7296
7297 /* done. */
7298
7299#elif RT_ARCH_ARM64
7300 /*
7301 * ARM64: w0 = call status code.
7302 */
7303# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7304 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
7305# endif
7306 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7307
7308 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7309
7310 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
7311
7312 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7313 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7314 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
7315
7316#else
7317# error "port me"
7318#endif
7319 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7320 RT_NOREF_PV(idxInstr);
7321 return off;
7322}
7323
7324
7325/**
7326 * Emits code to check if the content of @a idxAddrReg is a canonical address,
7327 * raising a \#GP(0) if it isn't.
7328 *
7329 * @returns New code buffer offset, UINT32_MAX on failure.
7330 * @param pReNative The native recompile state.
7331 * @param off The code buffer offset.
7332 * @param idxAddrReg The host register with the address to check.
7333 * @param idxInstr The current instruction.
7334 */
7335DECL_HIDDEN_THROW(uint32_t)
7336iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
7337{
7338 /*
7339 * Make sure we don't have any outstanding guest register writes as we may
7340 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7341 */
7342 off = iemNativeRegFlushPendingWrites(pReNative, off);
7343
7344#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7345 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7346#else
7347 RT_NOREF(idxInstr);
7348#endif
7349
7350#ifdef RT_ARCH_AMD64
7351 /*
7352 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
7353 * return raisexcpt();
7354 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
7355 */
7356 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7357
7358 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
7359 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
7360 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
7361 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
7362 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7363
7364 iemNativeRegFreeTmp(pReNative, iTmpReg);
7365
7366#elif defined(RT_ARCH_ARM64)
7367 /*
7368 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
7369 * return raisexcpt();
7370 * ----
7371 * mov x1, 0x800000000000
7372 * add x1, x0, x1
7373 * cmp xzr, x1, lsr 48
7374 * b.ne .Lraisexcpt
7375 */
7376 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7377
7378 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
7379 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
7380 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
7381 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7382
7383 iemNativeRegFreeTmp(pReNative, iTmpReg);
7384
7385#else
7386# error "Port me"
7387#endif
7388 return off;
7389}
7390
7391
7392/**
7393 * Emits code to check if that the content of @a idxAddrReg is within the limit
7394 * of CS, raising a \#GP(0) if it isn't.
7395 *
7396 * @returns New code buffer offset; throws VBox status code on error.
7397 * @param pReNative The native recompile state.
7398 * @param off The code buffer offset.
7399 * @param idxAddrReg The host register (32-bit) with the address to
7400 * check.
7401 * @param idxInstr The current instruction.
7402 */
7403DECL_HIDDEN_THROW(uint32_t)
7404iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7405 uint8_t idxAddrReg, uint8_t idxInstr)
7406{
7407 /*
7408 * Make sure we don't have any outstanding guest register writes as we may
7409 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7410 */
7411 off = iemNativeRegFlushPendingWrites(pReNative, off);
7412
7413#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7414 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7415#else
7416 RT_NOREF(idxInstr);
7417#endif
7418
7419 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
7420 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
7421 kIemNativeGstRegUse_ReadOnly);
7422
7423 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
7424 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7425
7426 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
7427 return off;
7428}
7429
7430
7431/**
7432 * Emits a call to a CImpl function or something similar.
7433 */
7434DECL_HIDDEN_THROW(uint32_t)
7435iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
7436 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
7437{
7438 /* Writeback everything. */
7439 off = iemNativeRegFlushPendingWrites(pReNative, off);
7440
7441 /*
7442 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
7443 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
7444 */
7445 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
7446 fGstShwFlush
7447 | RT_BIT_64(kIemNativeGstReg_Pc)
7448 | RT_BIT_64(kIemNativeGstReg_EFlags));
7449 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7450
7451 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7452
7453 /*
7454 * Load the parameters.
7455 */
7456#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
7457 /* Special code the hidden VBOXSTRICTRC pointer. */
7458 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7459 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7460 if (cAddParams > 0)
7461 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
7462 if (cAddParams > 1)
7463 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
7464 if (cAddParams > 2)
7465 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
7466 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7467
7468#else
7469 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7470 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7471 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7472 if (cAddParams > 0)
7473 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
7474 if (cAddParams > 1)
7475 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
7476 if (cAddParams > 2)
7477# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
7478 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
7479# else
7480 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
7481# endif
7482#endif
7483
7484 /*
7485 * Make the call.
7486 */
7487 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
7488
7489#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7490 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7491#endif
7492
7493 /*
7494 * Check the status code.
7495 */
7496 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7497}
7498
7499
7500/**
7501 * Emits a call to a threaded worker function.
7502 */
7503DECL_HIDDEN_THROW(uint32_t)
7504iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7505{
7506 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7507
7508 /* We don't know what the threaded function is doing so we must flush all pending writes. */
7509 off = iemNativeRegFlushPendingWrites(pReNative, off);
7510
7511 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
7512 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7513
7514#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7515 /* The threaded function may throw / long jmp, so set current instruction
7516 number if we're counting. */
7517 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7518#endif
7519
7520 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
7521
7522#ifdef RT_ARCH_AMD64
7523 /* Load the parameters and emit the call. */
7524# ifdef RT_OS_WINDOWS
7525# ifndef VBOXSTRICTRC_STRICT_ENABLED
7526 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7527 if (cParams > 0)
7528 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
7529 if (cParams > 1)
7530 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
7531 if (cParams > 2)
7532 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
7533# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
7534 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
7535 if (cParams > 0)
7536 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
7537 if (cParams > 1)
7538 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
7539 if (cParams > 2)
7540 {
7541 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
7542 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
7543 }
7544 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7545# endif /* VBOXSTRICTRC_STRICT_ENABLED */
7546# else
7547 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7548 if (cParams > 0)
7549 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
7550 if (cParams > 1)
7551 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
7552 if (cParams > 2)
7553 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
7554# endif
7555
7556 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7557
7558# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7559 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7560# endif
7561
7562#elif RT_ARCH_ARM64
7563 /*
7564 * ARM64:
7565 */
7566 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7567 if (cParams > 0)
7568 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
7569 if (cParams > 1)
7570 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
7571 if (cParams > 2)
7572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
7573
7574 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7575
7576#else
7577# error "port me"
7578#endif
7579
7580 /*
7581 * Check the status code.
7582 */
7583 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
7584
7585 return off;
7586}
7587
7588#ifdef VBOX_WITH_STATISTICS
7589/**
7590 * Emits code to update the thread call statistics.
7591 */
7592DECL_INLINE_THROW(uint32_t)
7593iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7594{
7595 /*
7596 * Update threaded function stats.
7597 */
7598 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
7599 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
7600# if defined(RT_ARCH_ARM64)
7601 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
7602 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
7603 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
7604 iemNativeRegFreeTmp(pReNative, idxTmp1);
7605 iemNativeRegFreeTmp(pReNative, idxTmp2);
7606# else
7607 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
7608# endif
7609 return off;
7610}
7611#endif /* VBOX_WITH_STATISTICS */
7612
7613
7614/**
7615 * Emits the code at the ReturnWithFlags label (returns
7616 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7617 */
7618static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7619{
7620 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7621 if (idxLabel != UINT32_MAX)
7622 {
7623 iemNativeLabelDefine(pReNative, idxLabel, off);
7624
7625 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7626
7627 /* jump back to the return sequence. */
7628 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7629 }
7630 return off;
7631}
7632
7633
7634/**
7635 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7636 */
7637static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7638{
7639 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7640 if (idxLabel != UINT32_MAX)
7641 {
7642 iemNativeLabelDefine(pReNative, idxLabel, off);
7643
7644 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7645
7646 /* jump back to the return sequence. */
7647 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7648 }
7649 return off;
7650}
7651
7652
7653/**
7654 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7655 */
7656static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7657{
7658 /*
7659 * Generate the rc + rcPassUp fiddling code if needed.
7660 */
7661 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7662 if (idxLabel != UINT32_MAX)
7663 {
7664 iemNativeLabelDefine(pReNative, idxLabel, off);
7665
7666 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7667#ifdef RT_ARCH_AMD64
7668# ifdef RT_OS_WINDOWS
7669# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7670 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7671# endif
7672 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7673 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7674# else
7675 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7676 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7677# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7678 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7679# endif
7680# endif
7681# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7682 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7683# endif
7684
7685#else
7686 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7687 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7688 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7689#endif
7690
7691 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7692 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7693 }
7694 return off;
7695}
7696
7697
7698/**
7699 * Emits a standard epilog.
7700 */
7701static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7702{
7703 *pidxReturnLabel = UINT32_MAX;
7704
7705 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7706 off = iemNativeRegFlushPendingWrites(pReNative, off);
7707
7708 /*
7709 * Successful return, so clear the return register (eax, w0).
7710 */
7711 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7712
7713 /*
7714 * Define label for common return point.
7715 */
7716 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7717 *pidxReturnLabel = idxReturn;
7718
7719 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7720
7721 /*
7722 * Restore registers and return.
7723 */
7724#ifdef RT_ARCH_AMD64
7725 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7726
7727 /* Reposition esp at the r15 restore point. */
7728 pbCodeBuf[off++] = X86_OP_REX_W;
7729 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7730 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7731 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7732
7733 /* Pop non-volatile registers and return */
7734 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7735 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7736 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7737 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7738 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7739 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7740 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7741 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7742# ifdef RT_OS_WINDOWS
7743 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7744 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7745# endif
7746 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7747 pbCodeBuf[off++] = 0xc9; /* leave */
7748 pbCodeBuf[off++] = 0xc3; /* ret */
7749 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7750
7751#elif RT_ARCH_ARM64
7752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7753
7754 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7755 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7756 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7757 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7758 IEMNATIVE_FRAME_VAR_SIZE / 8);
7759 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7760 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7761 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7762 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7763 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7764 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7765 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7766 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7767 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7768 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7769 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7770 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7771
7772 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7773 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7774 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7775 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7776
7777 /* retab / ret */
7778# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7779 if (1)
7780 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7781 else
7782# endif
7783 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7784
7785#else
7786# error "port me"
7787#endif
7788 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7789
7790 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7791}
7792
7793
7794/**
7795 * Emits a standard prolog.
7796 */
7797static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7798{
7799#ifdef RT_ARCH_AMD64
7800 /*
7801 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7802 * reserving 64 bytes for stack variables plus 4 non-register argument
7803 * slots. Fixed register assignment: xBX = pReNative;
7804 *
7805 * Since we always do the same register spilling, we can use the same
7806 * unwind description for all the code.
7807 */
7808 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7809 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7810 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7811 pbCodeBuf[off++] = 0x8b;
7812 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7813 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7814 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7815# ifdef RT_OS_WINDOWS
7816 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7817 pbCodeBuf[off++] = 0x8b;
7818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7819 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7820 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7821# else
7822 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7823 pbCodeBuf[off++] = 0x8b;
7824 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7825# endif
7826 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7827 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7828 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7829 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7830 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7831 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7832 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7833 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7834
7835# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7836 /* Save the frame pointer. */
7837 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7838# endif
7839
7840 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7841 X86_GREG_xSP,
7842 IEMNATIVE_FRAME_ALIGN_SIZE
7843 + IEMNATIVE_FRAME_VAR_SIZE
7844 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7845 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7846 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7847 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7848 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7849
7850#elif RT_ARCH_ARM64
7851 /*
7852 * We set up a stack frame exactly like on x86, only we have to push the
7853 * return address our selves here. We save all non-volatile registers.
7854 */
7855 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7856
7857# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7858 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7859 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7860 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7861 /* pacibsp */
7862 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7863# endif
7864
7865 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7866 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7867 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7868 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7869 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7870 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7871 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7872 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7873 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7874 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7875 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7876 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7877 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7878 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7879 /* Save the BP and LR (ret address) registers at the top of the frame. */
7880 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7881 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7882 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7883 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7884 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7885 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7886
7887 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7888 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7889
7890 /* mov r28, r0 */
7891 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7892 /* mov r27, r1 */
7893 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7894
7895# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7896 /* Save the frame pointer. */
7897 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7898 ARMV8_A64_REG_X2);
7899# endif
7900
7901#else
7902# error "port me"
7903#endif
7904 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7905 return off;
7906}
7907
7908
7909/*********************************************************************************************************************************
7910* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7911*********************************************************************************************************************************/
7912
7913/**
7914 * Internal work that allocates a variable with kind set to
7915 * kIemNativeVarKind_Invalid and no current stack allocation.
7916 *
7917 * The kind will either be set by the caller or later when the variable is first
7918 * assigned a value.
7919 *
7920 * @returns Unpacked index.
7921 * @internal
7922 */
7923static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7924{
7925 Assert(cbType > 0 && cbType <= 64);
7926 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7927 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7928 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7929 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7930 pReNative->Core.aVars[idxVar].cbVar = cbType;
7931 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7932 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7933 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7934 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7935 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7936 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7937 pReNative->Core.aVars[idxVar].u.uValue = 0;
7938#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7939 pReNative->Core.aVars[idxVar].fSimdReg = false;
7940#endif
7941 return idxVar;
7942}
7943
7944
7945/**
7946 * Internal work that allocates an argument variable w/o setting enmKind.
7947 *
7948 * @returns Unpacked index.
7949 * @internal
7950 */
7951static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7952{
7953 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7954 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7955 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7956
7957 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7958 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7959 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7960 return idxVar;
7961}
7962
7963
7964/**
7965 * Gets the stack slot for a stack variable, allocating one if necessary.
7966 *
7967 * Calling this function implies that the stack slot will contain a valid
7968 * variable value. The caller deals with any register currently assigned to the
7969 * variable, typically by spilling it into the stack slot.
7970 *
7971 * @returns The stack slot number.
7972 * @param pReNative The recompiler state.
7973 * @param idxVar The variable.
7974 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7975 */
7976DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7977{
7978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7979 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7980 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7981
7982 /* Already got a slot? */
7983 uint8_t const idxStackSlot = pVar->idxStackSlot;
7984 if (idxStackSlot != UINT8_MAX)
7985 {
7986 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7987 return idxStackSlot;
7988 }
7989
7990 /*
7991 * A single slot is easy to allocate.
7992 * Allocate them from the top end, closest to BP, to reduce the displacement.
7993 */
7994 if (pVar->cbVar <= sizeof(uint64_t))
7995 {
7996 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7997 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7998 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7999 pVar->idxStackSlot = (uint8_t)iSlot;
8000 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
8001 return (uint8_t)iSlot;
8002 }
8003
8004 /*
8005 * We need more than one stack slot.
8006 *
8007 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
8008 */
8009 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
8010 Assert(pVar->cbVar <= 64);
8011 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
8012 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
8013 uint32_t bmStack = pReNative->Core.bmStack;
8014 while (bmStack != UINT32_MAX)
8015 {
8016 unsigned iSlot = ASMBitLastSetU32(~bmStack);
8017 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8018 iSlot = (iSlot - 1) & ~fBitAlignMask;
8019 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
8020 {
8021 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
8022 pVar->idxStackSlot = (uint8_t)iSlot;
8023 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8024 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
8025 return (uint8_t)iSlot;
8026 }
8027
8028 bmStack |= (fBitAllocMask << iSlot);
8029 }
8030 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8031}
8032
8033
8034/**
8035 * Changes the variable to a stack variable.
8036 *
8037 * Currently this is s only possible to do the first time the variable is used,
8038 * switching later is can be implemented but not done.
8039 *
8040 * @param pReNative The recompiler state.
8041 * @param idxVar The variable.
8042 * @throws VERR_IEM_VAR_IPE_2
8043 */
8044DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8045{
8046 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8047 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8048 if (pVar->enmKind != kIemNativeVarKind_Stack)
8049 {
8050 /* We could in theory transition from immediate to stack as well, but it
8051 would involve the caller doing work storing the value on the stack. So,
8052 till that's required we only allow transition from invalid. */
8053 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8054 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8055 pVar->enmKind = kIemNativeVarKind_Stack;
8056
8057 /* Note! We don't allocate a stack slot here, that's only done when a
8058 slot is actually needed to hold a variable value. */
8059 }
8060}
8061
8062
8063/**
8064 * Sets it to a variable with a constant value.
8065 *
8066 * This does not require stack storage as we know the value and can always
8067 * reload it, unless of course it's referenced.
8068 *
8069 * @param pReNative The recompiler state.
8070 * @param idxVar The variable.
8071 * @param uValue The immediate value.
8072 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8073 */
8074DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
8075{
8076 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8077 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8078 if (pVar->enmKind != kIemNativeVarKind_Immediate)
8079 {
8080 /* Only simple transitions for now. */
8081 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8082 pVar->enmKind = kIemNativeVarKind_Immediate;
8083 }
8084 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8085
8086 pVar->u.uValue = uValue;
8087 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
8088 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
8089 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
8090}
8091
8092
8093/**
8094 * Sets the variable to a reference (pointer) to @a idxOtherVar.
8095 *
8096 * This does not require stack storage as we know the value and can always
8097 * reload it. Loading is postponed till needed.
8098 *
8099 * @param pReNative The recompiler state.
8100 * @param idxVar The variable. Unpacked.
8101 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8102 *
8103 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8104 * @internal
8105 */
8106static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8107{
8108 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8109 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8110
8111 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8112 {
8113 /* Only simple transitions for now. */
8114 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8115 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8116 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8117 }
8118 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8119
8120 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8121
8122 /* Update the other variable, ensure it's a stack variable. */
8123 /** @todo handle variables with const values... that'll go boom now. */
8124 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8125 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8126}
8127
8128
8129/**
8130 * Sets the variable to a reference (pointer) to a guest register reference.
8131 *
8132 * This does not require stack storage as we know the value and can always
8133 * reload it. Loading is postponed till needed.
8134 *
8135 * @param pReNative The recompiler state.
8136 * @param idxVar The variable.
8137 * @param enmRegClass The class guest registers to reference.
8138 * @param idxReg The register within @a enmRegClass to reference.
8139 *
8140 * @throws VERR_IEM_VAR_IPE_2
8141 */
8142DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8143 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8144{
8145 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8146 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8147
8148 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8149 {
8150 /* Only simple transitions for now. */
8151 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8152 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8153 }
8154 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8155
8156 pVar->u.GstRegRef.enmClass = enmRegClass;
8157 pVar->u.GstRegRef.idx = idxReg;
8158}
8159
8160
8161DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8162{
8163 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8164}
8165
8166
8167DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8168{
8169 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8170
8171 /* Since we're using a generic uint64_t value type, we must truncate it if
8172 the variable is smaller otherwise we may end up with too large value when
8173 scaling up a imm8 w/ sign-extension.
8174
8175 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8176 in the bios, bx=1) when running on arm, because clang expect 16-bit
8177 register parameters to have bits 16 and up set to zero. Instead of
8178 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8179 CF value in the result. */
8180 switch (cbType)
8181 {
8182 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8183 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8184 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8185 }
8186 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8187 return idxVar;
8188}
8189
8190
8191DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8192{
8193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8194 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8195 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8196 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8197 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8198 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8199
8200 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8201 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8202 return idxArgVar;
8203}
8204
8205
8206DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8207{
8208 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8209 /* Don't set to stack now, leave that to the first use as for instance
8210 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8211 return idxVar;
8212}
8213
8214
8215DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8216{
8217 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8218
8219 /* Since we're using a generic uint64_t value type, we must truncate it if
8220 the variable is smaller otherwise we may end up with too large value when
8221 scaling up a imm8 w/ sign-extension. */
8222 switch (cbType)
8223 {
8224 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8225 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8226 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8227 }
8228 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8229 return idxVar;
8230}
8231
8232
8233DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
8234{
8235 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8236 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8237
8238 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
8239 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
8240
8241 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
8242
8243 /* Truncate the value to this variables size. */
8244 switch (cbType)
8245 {
8246 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
8247 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
8248 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
8249 }
8250
8251 iemNativeVarRegisterRelease(pReNative, idxVarOther);
8252 iemNativeVarRegisterRelease(pReNative, idxVar);
8253 return idxVar;
8254}
8255
8256
8257/**
8258 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8259 * fixed till we call iemNativeVarRegisterRelease.
8260 *
8261 * @returns The host register number.
8262 * @param pReNative The recompiler state.
8263 * @param idxVar The variable.
8264 * @param poff Pointer to the instruction buffer offset.
8265 * In case a register needs to be freed up or the value
8266 * loaded off the stack.
8267 * @param fInitialized Set if the variable must already have been initialized.
8268 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8269 * the case.
8270 * @param idxRegPref Preferred register number or UINT8_MAX.
8271 */
8272DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8273 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8274{
8275 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8276 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8277 Assert(pVar->cbVar <= 8);
8278 Assert(!pVar->fRegAcquired);
8279
8280 uint8_t idxReg = pVar->idxReg;
8281 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8282 {
8283 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8284 && pVar->enmKind < kIemNativeVarKind_End);
8285 pVar->fRegAcquired = true;
8286 return idxReg;
8287 }
8288
8289 /*
8290 * If the kind of variable has not yet been set, default to 'stack'.
8291 */
8292 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8293 && pVar->enmKind < kIemNativeVarKind_End);
8294 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8295 iemNativeVarSetKindToStack(pReNative, idxVar);
8296
8297 /*
8298 * We have to allocate a register for the variable, even if its a stack one
8299 * as we don't know if there are modification being made to it before its
8300 * finalized (todo: analyze and insert hints about that?).
8301 *
8302 * If we can, we try get the correct register for argument variables. This
8303 * is assuming that most argument variables are fetched as close as possible
8304 * to the actual call, so that there aren't any interfering hidden calls
8305 * (memory accesses, etc) inbetween.
8306 *
8307 * If we cannot or it's a variable, we make sure no argument registers
8308 * that will be used by this MC block will be allocated here, and we always
8309 * prefer non-volatile registers to avoid needing to spill stuff for internal
8310 * call.
8311 */
8312 /** @todo Detect too early argument value fetches and warn about hidden
8313 * calls causing less optimal code to be generated in the python script. */
8314
8315 uint8_t const uArgNo = pVar->uArgNo;
8316 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8317 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8318 {
8319 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8320
8321#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8322 /* Writeback any dirty shadow registers we are about to unshadow. */
8323 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
8324#endif
8325
8326 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8327 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8328 }
8329 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8330 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8331 {
8332 /** @todo there must be a better way for this and boot cArgsX? */
8333 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8334 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8335 & ~pReNative->Core.bmHstRegsWithGstShadow
8336 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8337 & fNotArgsMask;
8338 if (fRegs)
8339 {
8340 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8341 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8342 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8343 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8344 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8345 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8346 }
8347 else
8348 {
8349 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8350 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8351 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8352 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8353 }
8354 }
8355 else
8356 {
8357 idxReg = idxRegPref;
8358 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8359 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8360 }
8361 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8362 pVar->idxReg = idxReg;
8363
8364#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8365 pVar->fSimdReg = false;
8366#endif
8367
8368 /*
8369 * Load it off the stack if we've got a stack slot.
8370 */
8371 uint8_t const idxStackSlot = pVar->idxStackSlot;
8372 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8373 {
8374 Assert(fInitialized);
8375 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8376 switch (pVar->cbVar)
8377 {
8378 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8379 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8380 case 3: AssertFailed(); RT_FALL_THRU();
8381 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8382 default: AssertFailed(); RT_FALL_THRU();
8383 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8384 }
8385 }
8386 else
8387 {
8388 Assert(idxStackSlot == UINT8_MAX);
8389 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8390 }
8391 pVar->fRegAcquired = true;
8392 return idxReg;
8393}
8394
8395
8396#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8397/**
8398 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
8399 * fixed till we call iemNativeVarRegisterRelease.
8400 *
8401 * @returns The host register number.
8402 * @param pReNative The recompiler state.
8403 * @param idxVar The variable.
8404 * @param poff Pointer to the instruction buffer offset.
8405 * In case a register needs to be freed up or the value
8406 * loaded off the stack.
8407 * @param fInitialized Set if the variable must already have been initialized.
8408 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8409 * the case.
8410 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
8411 */
8412DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8413 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8414{
8415 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8416 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8417 Assert( pVar->cbVar == sizeof(RTUINT128U)
8418 || pVar->cbVar == sizeof(RTUINT256U));
8419 Assert(!pVar->fRegAcquired);
8420
8421 uint8_t idxReg = pVar->idxReg;
8422 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
8423 {
8424 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8425 && pVar->enmKind < kIemNativeVarKind_End);
8426 pVar->fRegAcquired = true;
8427 return idxReg;
8428 }
8429
8430 /*
8431 * If the kind of variable has not yet been set, default to 'stack'.
8432 */
8433 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8434 && pVar->enmKind < kIemNativeVarKind_End);
8435 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8436 iemNativeVarSetKindToStack(pReNative, idxVar);
8437
8438 /*
8439 * We have to allocate a register for the variable, even if its a stack one
8440 * as we don't know if there are modification being made to it before its
8441 * finalized (todo: analyze and insert hints about that?).
8442 *
8443 * If we can, we try get the correct register for argument variables. This
8444 * is assuming that most argument variables are fetched as close as possible
8445 * to the actual call, so that there aren't any interfering hidden calls
8446 * (memory accesses, etc) inbetween.
8447 *
8448 * If we cannot or it's a variable, we make sure no argument registers
8449 * that will be used by this MC block will be allocated here, and we always
8450 * prefer non-volatile registers to avoid needing to spill stuff for internal
8451 * call.
8452 */
8453 /** @todo Detect too early argument value fetches and warn about hidden
8454 * calls causing less optimal code to be generated in the python script. */
8455
8456 uint8_t const uArgNo = pVar->uArgNo;
8457 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
8458
8459 /* SIMD is bit simpler for now because there is no support for arguments. */
8460 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
8461 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
8462 {
8463 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8464 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
8465 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
8466 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
8467 & fNotArgsMask;
8468 if (fRegs)
8469 {
8470 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
8471 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
8472 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
8473 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
8474 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8475 }
8476 else
8477 {
8478 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8479 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
8480 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8481 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8482 }
8483 }
8484 else
8485 {
8486 idxReg = idxRegPref;
8487 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8488 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8489 }
8490 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8491
8492 pVar->fSimdReg = true;
8493 pVar->idxReg = idxReg;
8494
8495 /*
8496 * Load it off the stack if we've got a stack slot.
8497 */
8498 uint8_t const idxStackSlot = pVar->idxStackSlot;
8499 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8500 {
8501 Assert(fInitialized);
8502 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8503 switch (pVar->cbVar)
8504 {
8505 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
8506 default: AssertFailed(); RT_FALL_THRU();
8507 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
8508 }
8509 }
8510 else
8511 {
8512 Assert(idxStackSlot == UINT8_MAX);
8513 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8514 }
8515 pVar->fRegAcquired = true;
8516 return idxReg;
8517}
8518#endif
8519
8520
8521/**
8522 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8523 * guest register.
8524 *
8525 * This function makes sure there is a register for it and sets it to be the
8526 * current shadow copy of @a enmGstReg.
8527 *
8528 * @returns The host register number.
8529 * @param pReNative The recompiler state.
8530 * @param idxVar The variable.
8531 * @param enmGstReg The guest register this variable will be written to
8532 * after this call.
8533 * @param poff Pointer to the instruction buffer offset.
8534 * In case a register needs to be freed up or if the
8535 * variable content needs to be loaded off the stack.
8536 *
8537 * @note We DO NOT expect @a idxVar to be an argument variable,
8538 * because we can only in the commit stage of an instruction when this
8539 * function is used.
8540 */
8541DECL_HIDDEN_THROW(uint8_t)
8542iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8543{
8544 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8545 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8546 Assert(!pVar->fRegAcquired);
8547 AssertMsgStmt( pVar->cbVar <= 8
8548 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8549 || pVar->enmKind == kIemNativeVarKind_Stack),
8550 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8551 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8552 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8553
8554 /*
8555 * This shouldn't ever be used for arguments, unless it's in a weird else
8556 * branch that doesn't do any calling and even then it's questionable.
8557 *
8558 * However, in case someone writes crazy wrong MC code and does register
8559 * updates before making calls, just use the regular register allocator to
8560 * ensure we get a register suitable for the intended argument number.
8561 */
8562 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8563
8564 /*
8565 * If there is already a register for the variable, we transfer/set the
8566 * guest shadow copy assignment to it.
8567 */
8568 uint8_t idxReg = pVar->idxReg;
8569 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8570 {
8571#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8572 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
8573 {
8574# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8575 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
8576 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
8577# endif
8578 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
8579 }
8580#endif
8581
8582 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8583 {
8584 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8585 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8586 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8587 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8588 }
8589 else
8590 {
8591 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8592 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8593 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8594 }
8595 /** @todo figure this one out. We need some way of making sure the register isn't
8596 * modified after this point, just in case we start writing crappy MC code. */
8597 pVar->enmGstReg = enmGstReg;
8598 pVar->fRegAcquired = true;
8599 return idxReg;
8600 }
8601 Assert(pVar->uArgNo == UINT8_MAX);
8602
8603 /*
8604 * Because this is supposed to be the commit stage, we're just tag along with the
8605 * temporary register allocator and upgrade it to a variable register.
8606 */
8607 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8608 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8609 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8610 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8611 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8612 pVar->idxReg = idxReg;
8613
8614 /*
8615 * Now we need to load the register value.
8616 */
8617 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8618 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8619 else
8620 {
8621 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8622 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8623 switch (pVar->cbVar)
8624 {
8625 case sizeof(uint64_t):
8626 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8627 break;
8628 case sizeof(uint32_t):
8629 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8630 break;
8631 case sizeof(uint16_t):
8632 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8633 break;
8634 case sizeof(uint8_t):
8635 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8636 break;
8637 default:
8638 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8639 }
8640 }
8641
8642 pVar->fRegAcquired = true;
8643 return idxReg;
8644}
8645
8646
8647/**
8648 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8649 *
8650 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8651 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8652 * requirement of flushing anything in volatile host registers when making a
8653 * call.
8654 *
8655 * @returns New @a off value.
8656 * @param pReNative The recompiler state.
8657 * @param off The code buffer position.
8658 * @param fHstRegsNotToSave Set of registers not to save & restore.
8659 */
8660DECL_HIDDEN_THROW(uint32_t)
8661iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8662{
8663 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8664 if (fHstRegs)
8665 {
8666 do
8667 {
8668 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8669 fHstRegs &= ~RT_BIT_32(idxHstReg);
8670
8671 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8672 {
8673 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8674 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8675 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8676 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8677 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8678 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8679 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8680 {
8681 case kIemNativeVarKind_Stack:
8682 {
8683 /* Temporarily spill the variable register. */
8684 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8685 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8686 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8687 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8688 continue;
8689 }
8690
8691 case kIemNativeVarKind_Immediate:
8692 case kIemNativeVarKind_VarRef:
8693 case kIemNativeVarKind_GstRegRef:
8694 /* It is weird to have any of these loaded at this point. */
8695 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8696 continue;
8697
8698 case kIemNativeVarKind_End:
8699 case kIemNativeVarKind_Invalid:
8700 break;
8701 }
8702 AssertFailed();
8703 }
8704 else
8705 {
8706 /*
8707 * Allocate a temporary stack slot and spill the register to it.
8708 */
8709 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8710 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8711 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8712 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8713 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8714 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8715 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8716 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8717 }
8718 } while (fHstRegs);
8719 }
8720#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8721
8722 /*
8723 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
8724 * which would be more difficult due to spanning multiple stack slots and different sizes
8725 * (besides we only have a limited amount of slots at the moment).
8726 *
8727 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
8728 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
8729 */
8730 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
8731
8732 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8733 if (fHstRegs)
8734 {
8735 do
8736 {
8737 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8738 fHstRegs &= ~RT_BIT_32(idxHstReg);
8739
8740 /* Fixed reserved and temporary registers don't need saving. */
8741 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
8742 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
8743 continue;
8744
8745 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8746
8747 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8748 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8749 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8750 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8751 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8752 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8753 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8754 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8755 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8756 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8757 {
8758 case kIemNativeVarKind_Stack:
8759 {
8760 /* Temporarily spill the variable register. */
8761 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8762 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8763 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8764 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8765 if (cbVar == sizeof(RTUINT128U))
8766 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8767 else
8768 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8769 continue;
8770 }
8771
8772 case kIemNativeVarKind_Immediate:
8773 case kIemNativeVarKind_VarRef:
8774 case kIemNativeVarKind_GstRegRef:
8775 /* It is weird to have any of these loaded at this point. */
8776 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8777 continue;
8778
8779 case kIemNativeVarKind_End:
8780 case kIemNativeVarKind_Invalid:
8781 break;
8782 }
8783 AssertFailed();
8784 } while (fHstRegs);
8785 }
8786#endif
8787 return off;
8788}
8789
8790
8791/**
8792 * Emit code to restore volatile registers after to a call to a helper.
8793 *
8794 * @returns New @a off value.
8795 * @param pReNative The recompiler state.
8796 * @param off The code buffer position.
8797 * @param fHstRegsNotToSave Set of registers not to save & restore.
8798 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8799 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8800 */
8801DECL_HIDDEN_THROW(uint32_t)
8802iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8803{
8804 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8805 if (fHstRegs)
8806 {
8807 do
8808 {
8809 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8810 fHstRegs &= ~RT_BIT_32(idxHstReg);
8811
8812 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8813 {
8814 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8815 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8816 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8817 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8818 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8819 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8820 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8821 {
8822 case kIemNativeVarKind_Stack:
8823 {
8824 /* Unspill the variable register. */
8825 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8826 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8827 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8828 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8829 continue;
8830 }
8831
8832 case kIemNativeVarKind_Immediate:
8833 case kIemNativeVarKind_VarRef:
8834 case kIemNativeVarKind_GstRegRef:
8835 /* It is weird to have any of these loaded at this point. */
8836 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8837 continue;
8838
8839 case kIemNativeVarKind_End:
8840 case kIemNativeVarKind_Invalid:
8841 break;
8842 }
8843 AssertFailed();
8844 }
8845 else
8846 {
8847 /*
8848 * Restore from temporary stack slot.
8849 */
8850 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8851 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8852 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8853 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8854
8855 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8856 }
8857 } while (fHstRegs);
8858 }
8859#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8860 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8861 if (fHstRegs)
8862 {
8863 do
8864 {
8865 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8866 fHstRegs &= ~RT_BIT_32(idxHstReg);
8867
8868 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8869 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8870 continue;
8871 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8872
8873 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8874 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8875 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8876 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8877 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8878 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8879 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8880 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8881 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8882 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8883 {
8884 case kIemNativeVarKind_Stack:
8885 {
8886 /* Unspill the variable register. */
8887 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8888 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8889 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8890 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8891
8892 if (cbVar == sizeof(RTUINT128U))
8893 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8894 else
8895 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8896 continue;
8897 }
8898
8899 case kIemNativeVarKind_Immediate:
8900 case kIemNativeVarKind_VarRef:
8901 case kIemNativeVarKind_GstRegRef:
8902 /* It is weird to have any of these loaded at this point. */
8903 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8904 continue;
8905
8906 case kIemNativeVarKind_End:
8907 case kIemNativeVarKind_Invalid:
8908 break;
8909 }
8910 AssertFailed();
8911 } while (fHstRegs);
8912 }
8913#endif
8914 return off;
8915}
8916
8917
8918/**
8919 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8920 *
8921 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8922 *
8923 * ASSUMES that @a idxVar is valid and unpacked.
8924 */
8925DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8926{
8927 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8928 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8929 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8930 {
8931 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8932 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8933 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8934 Assert(cSlots > 0);
8935 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8936 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8937 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8938 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8939 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8940 }
8941 else
8942 Assert(idxStackSlot == UINT8_MAX);
8943}
8944
8945
8946/**
8947 * Worker that frees a single variable.
8948 *
8949 * ASSUMES that @a idxVar is valid and unpacked.
8950 */
8951DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8952{
8953 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8954 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8955 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8956
8957 /* Free the host register first if any assigned. */
8958 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8959#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8960 if ( idxHstReg != UINT8_MAX
8961 && pReNative->Core.aVars[idxVar].fSimdReg)
8962 {
8963 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8964 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8965 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8966 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8967 }
8968 else
8969#endif
8970 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8971 {
8972 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8973 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8974 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8975 }
8976
8977 /* Free argument mapping. */
8978 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8979 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8980 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8981
8982 /* Free the stack slots. */
8983 iemNativeVarFreeStackSlots(pReNative, idxVar);
8984
8985 /* Free the actual variable. */
8986 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8987 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8988}
8989
8990
8991/**
8992 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8993 */
8994DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8995{
8996 while (bmVars != 0)
8997 {
8998 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8999 bmVars &= ~RT_BIT_32(idxVar);
9000
9001#if 1 /** @todo optimize by simplifying this later... */
9002 iemNativeVarFreeOneWorker(pReNative, idxVar);
9003#else
9004 /* Only need to free the host register, the rest is done as bulk updates below. */
9005 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9006 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9007 {
9008 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9009 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9010 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9011 }
9012#endif
9013 }
9014#if 0 /** @todo optimize by simplifying this later... */
9015 pReNative->Core.bmVars = 0;
9016 pReNative->Core.bmStack = 0;
9017 pReNative->Core.u64ArgVars = UINT64_MAX;
9018#endif
9019}
9020
9021
9022
9023/*********************************************************************************************************************************
9024* Emitters for IEM_MC_CALL_CIMPL_XXX *
9025*********************************************************************************************************************************/
9026
9027/**
9028 * Emits code to load a reference to the given guest register into @a idxGprDst.
9029 */
9030DECL_HIDDEN_THROW(uint32_t)
9031iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
9032 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
9033{
9034#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9035 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
9036#endif
9037
9038 /*
9039 * Get the offset relative to the CPUMCTX structure.
9040 */
9041 uint32_t offCpumCtx;
9042 switch (enmClass)
9043 {
9044 case kIemNativeGstRegRef_Gpr:
9045 Assert(idxRegInClass < 16);
9046 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
9047 break;
9048
9049 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
9050 Assert(idxRegInClass < 4);
9051 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
9052 break;
9053
9054 case kIemNativeGstRegRef_EFlags:
9055 Assert(idxRegInClass == 0);
9056 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
9057 break;
9058
9059 case kIemNativeGstRegRef_MxCsr:
9060 Assert(idxRegInClass == 0);
9061 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
9062 break;
9063
9064 case kIemNativeGstRegRef_FpuReg:
9065 Assert(idxRegInClass < 8);
9066 AssertFailed(); /** @todo what kind of indexing? */
9067 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9068 break;
9069
9070 case kIemNativeGstRegRef_MReg:
9071 Assert(idxRegInClass < 8);
9072 AssertFailed(); /** @todo what kind of indexing? */
9073 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9074 break;
9075
9076 case kIemNativeGstRegRef_XReg:
9077 Assert(idxRegInClass < 16);
9078 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
9079 break;
9080
9081 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
9082 Assert(idxRegInClass == 0);
9083 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
9084 break;
9085
9086 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
9087 Assert(idxRegInClass == 0);
9088 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
9089 break;
9090
9091 default:
9092 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
9093 }
9094
9095 /*
9096 * Load the value into the destination register.
9097 */
9098#ifdef RT_ARCH_AMD64
9099 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
9100
9101#elif defined(RT_ARCH_ARM64)
9102 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9103 Assert(offCpumCtx < 4096);
9104 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
9105
9106#else
9107# error "Port me!"
9108#endif
9109
9110 return off;
9111}
9112
9113
9114/**
9115 * Common code for CIMPL and AIMPL calls.
9116 *
9117 * These are calls that uses argument variables and such. They should not be
9118 * confused with internal calls required to implement an MC operation,
9119 * like a TLB load and similar.
9120 *
9121 * Upon return all that is left to do is to load any hidden arguments and
9122 * perform the call. All argument variables are freed.
9123 *
9124 * @returns New code buffer offset; throws VBox status code on error.
9125 * @param pReNative The native recompile state.
9126 * @param off The code buffer offset.
9127 * @param cArgs The total nubmer of arguments (includes hidden
9128 * count).
9129 * @param cHiddenArgs The number of hidden arguments. The hidden
9130 * arguments must not have any variable declared for
9131 * them, whereas all the regular arguments must
9132 * (tstIEMCheckMc ensures this).
9133 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
9134 * this will still flush pending writes in call volatile registers if false.
9135 */
9136DECL_HIDDEN_THROW(uint32_t)
9137iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
9138 bool fFlushPendingWrites /*= true*/)
9139{
9140#ifdef VBOX_STRICT
9141 /*
9142 * Assert sanity.
9143 */
9144 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
9145 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
9146 for (unsigned i = 0; i < cHiddenArgs; i++)
9147 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
9148 for (unsigned i = cHiddenArgs; i < cArgs; i++)
9149 {
9150 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
9151 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
9152 }
9153 iemNativeRegAssertSanity(pReNative);
9154#endif
9155
9156 /* We don't know what the called function makes use of, so flush any pending register writes. */
9157 RT_NOREF(fFlushPendingWrites);
9158#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9159 if (fFlushPendingWrites)
9160#endif
9161 off = iemNativeRegFlushPendingWrites(pReNative, off);
9162
9163 /*
9164 * Before we do anything else, go over variables that are referenced and
9165 * make sure they are not in a register.
9166 */
9167 uint32_t bmVars = pReNative->Core.bmVars;
9168 if (bmVars)
9169 {
9170 do
9171 {
9172 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9173 bmVars &= ~RT_BIT_32(idxVar);
9174
9175 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
9176 {
9177 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
9178#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9179 if ( idxRegOld != UINT8_MAX
9180 && pReNative->Core.aVars[idxVar].fSimdReg)
9181 {
9182 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9183 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
9184
9185 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9186 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9187 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9188 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9189 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
9190 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9191 else
9192 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9193
9194 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
9195 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
9196
9197 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9198 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
9199 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9200 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
9201 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
9202 }
9203 else
9204#endif
9205 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
9206 {
9207 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9208 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9209 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9210 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9211 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9212
9213 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9214 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
9215 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9216 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
9217 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
9218 }
9219 }
9220 } while (bmVars != 0);
9221#if 0 //def VBOX_STRICT
9222 iemNativeRegAssertSanity(pReNative);
9223#endif
9224 }
9225
9226 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
9227
9228#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9229 /*
9230 * At the very first step go over the host registers that will be used for arguments
9231 * don't shadow anything which needs writing back first.
9232 */
9233 for (uint32_t i = 0; i < cRegArgs; i++)
9234 {
9235 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9236
9237 /* Writeback any dirty guest shadows before using this register. */
9238 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
9239 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
9240 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
9241 }
9242#endif
9243
9244 /*
9245 * First, go over the host registers that will be used for arguments and make
9246 * sure they either hold the desired argument or are free.
9247 */
9248 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
9249 {
9250 for (uint32_t i = 0; i < cRegArgs; i++)
9251 {
9252 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9253 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9254 {
9255 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
9256 {
9257 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
9258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9259 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9260 Assert(pVar->idxReg == idxArgReg);
9261 uint8_t const uArgNo = pVar->uArgNo;
9262 if (uArgNo == i)
9263 { /* prefect */ }
9264 /* The variable allocator logic should make sure this is impossible,
9265 except for when the return register is used as a parameter (ARM,
9266 but not x86). */
9267#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
9268 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
9269 {
9270# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9271# error "Implement this"
9272# endif
9273 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
9274 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
9275 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
9276 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9277 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
9278 }
9279#endif
9280 else
9281 {
9282 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9283
9284 if (pVar->enmKind == kIemNativeVarKind_Stack)
9285 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
9286 else
9287 {
9288 /* just free it, can be reloaded if used again */
9289 pVar->idxReg = UINT8_MAX;
9290 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
9291 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
9292 }
9293 }
9294 }
9295 else
9296 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
9297 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
9298 }
9299 }
9300#if 0 //def VBOX_STRICT
9301 iemNativeRegAssertSanity(pReNative);
9302#endif
9303 }
9304
9305 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
9306
9307#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9308 /*
9309 * If there are any stack arguments, make sure they are in their place as well.
9310 *
9311 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9312 * the caller) be loading it later and it must be free (see first loop).
9313 */
9314 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9315 {
9316 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9317 {
9318 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9319 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9320 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9321 {
9322 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9323 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9324 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9325 pVar->idxReg = UINT8_MAX;
9326 }
9327 else
9328 {
9329 /* Use ARG0 as temp for stuff we need registers for. */
9330 switch (pVar->enmKind)
9331 {
9332 case kIemNativeVarKind_Stack:
9333 {
9334 uint8_t const idxStackSlot = pVar->idxStackSlot;
9335 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9336 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9337 iemNativeStackCalcBpDisp(idxStackSlot));
9338 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9339 continue;
9340 }
9341
9342 case kIemNativeVarKind_Immediate:
9343 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9344 continue;
9345
9346 case kIemNativeVarKind_VarRef:
9347 {
9348 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9349 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9350 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9351 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9352 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9353# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9354 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9355 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9356 if ( fSimdReg
9357 && idxRegOther != UINT8_MAX)
9358 {
9359 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9360 if (cbVar == sizeof(RTUINT128U))
9361 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9362 else
9363 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9364 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9365 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9366 }
9367 else
9368# endif
9369 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9370 {
9371 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9372 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9373 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9374 }
9375 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9376 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9377 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9378 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9379 continue;
9380 }
9381
9382 case kIemNativeVarKind_GstRegRef:
9383 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9384 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9385 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9386 continue;
9387
9388 case kIemNativeVarKind_Invalid:
9389 case kIemNativeVarKind_End:
9390 break;
9391 }
9392 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9393 }
9394 }
9395# if 0 //def VBOX_STRICT
9396 iemNativeRegAssertSanity(pReNative);
9397# endif
9398 }
9399#else
9400 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9401#endif
9402
9403 /*
9404 * Make sure the argument variables are loaded into their respective registers.
9405 *
9406 * We can optimize this by ASSUMING that any register allocations are for
9407 * registeres that have already been loaded and are ready. The previous step
9408 * saw to that.
9409 */
9410 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9411 {
9412 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9413 {
9414 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9415 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9416 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9417 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9418 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9419 else
9420 {
9421 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9422 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9423 {
9424 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9425 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9426 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9427 | RT_BIT_32(idxArgReg);
9428 pVar->idxReg = idxArgReg;
9429 }
9430 else
9431 {
9432 /* Use ARG0 as temp for stuff we need registers for. */
9433 switch (pVar->enmKind)
9434 {
9435 case kIemNativeVarKind_Stack:
9436 {
9437 uint8_t const idxStackSlot = pVar->idxStackSlot;
9438 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9439 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9440 continue;
9441 }
9442
9443 case kIemNativeVarKind_Immediate:
9444 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9445 continue;
9446
9447 case kIemNativeVarKind_VarRef:
9448 {
9449 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9450 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9451 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9452 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9453 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9454 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9455#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9456 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9457 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9458 if ( fSimdReg
9459 && idxRegOther != UINT8_MAX)
9460 {
9461 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9462 if (cbVar == sizeof(RTUINT128U))
9463 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9464 else
9465 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9466 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9467 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9468 }
9469 else
9470#endif
9471 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9472 {
9473 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9474 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9475 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9476 }
9477 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9478 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9479 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9480 continue;
9481 }
9482
9483 case kIemNativeVarKind_GstRegRef:
9484 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9485 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9486 continue;
9487
9488 case kIemNativeVarKind_Invalid:
9489 case kIemNativeVarKind_End:
9490 break;
9491 }
9492 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9493 }
9494 }
9495 }
9496#if 0 //def VBOX_STRICT
9497 iemNativeRegAssertSanity(pReNative);
9498#endif
9499 }
9500#ifdef VBOX_STRICT
9501 else
9502 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9503 {
9504 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9505 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9506 }
9507#endif
9508
9509 /*
9510 * Free all argument variables (simplified).
9511 * Their lifetime always expires with the call they are for.
9512 */
9513 /** @todo Make the python script check that arguments aren't used after
9514 * IEM_MC_CALL_XXXX. */
9515 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9516 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9517 * an argument value. There is also some FPU stuff. */
9518 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9519 {
9520 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9521 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9522
9523 /* no need to free registers: */
9524 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9525 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9526 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9527 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9528 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9529 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9530
9531 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9532 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9533 iemNativeVarFreeStackSlots(pReNative, idxVar);
9534 }
9535 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9536
9537 /*
9538 * Flush volatile registers as we make the call.
9539 */
9540 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9541
9542 return off;
9543}
9544
9545
9546
9547/*********************************************************************************************************************************
9548* TLB Lookup. *
9549*********************************************************************************************************************************/
9550
9551/**
9552 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
9553 */
9554DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
9555{
9556 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
9557 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
9558 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
9559 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
9560
9561 /* Do the lookup manually. */
9562 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
9563 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
9564 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
9565 if (RT_LIKELY(pTlbe->uTag == uTag))
9566 {
9567 /*
9568 * Check TLB page table level access flags.
9569 */
9570 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
9571 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
9572 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
9573 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
9574 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
9575 | IEMTLBE_F_PG_UNASSIGNED
9576 | IEMTLBE_F_PT_NO_ACCESSED
9577 | fNoWriteNoDirty | fNoUser);
9578 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
9579 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
9580 {
9581 /*
9582 * Return the address.
9583 */
9584 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
9585 if ((uintptr_t)pbAddr == uResult)
9586 return;
9587 RT_NOREF(cbMem);
9588 AssertFailed();
9589 }
9590 else
9591 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
9592 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
9593 }
9594 else
9595 AssertFailed();
9596 RT_BREAKPOINT();
9597}
9598
9599/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
9600
9601
9602
9603/*********************************************************************************************************************************
9604* Recompiler Core. *
9605*********************************************************************************************************************************/
9606
9607/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
9608static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
9609{
9610 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
9611 pDis->cbCachedInstr += cbMaxRead;
9612 RT_NOREF(cbMinRead);
9613 return VERR_NO_DATA;
9614}
9615
9616
9617DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
9618{
9619 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
9620 {
9621#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
9622 ENTRY(fLocalForcedActions),
9623 ENTRY(iem.s.rcPassUp),
9624 ENTRY(iem.s.fExec),
9625 ENTRY(iem.s.pbInstrBuf),
9626 ENTRY(iem.s.uInstrBufPc),
9627 ENTRY(iem.s.GCPhysInstrBuf),
9628 ENTRY(iem.s.cbInstrBufTotal),
9629 ENTRY(iem.s.idxTbCurInstr),
9630#ifdef VBOX_WITH_STATISTICS
9631 ENTRY(iem.s.StatNativeTlbHitsForFetch),
9632 ENTRY(iem.s.StatNativeTlbHitsForStore),
9633 ENTRY(iem.s.StatNativeTlbHitsForStack),
9634 ENTRY(iem.s.StatNativeTlbHitsForMapped),
9635 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
9636 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
9637 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
9638 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
9639#endif
9640 ENTRY(iem.s.DataTlb.aEntries),
9641 ENTRY(iem.s.DataTlb.uTlbRevision),
9642 ENTRY(iem.s.DataTlb.uTlbPhysRev),
9643 ENTRY(iem.s.DataTlb.cTlbHits),
9644 ENTRY(iem.s.CodeTlb.aEntries),
9645 ENTRY(iem.s.CodeTlb.uTlbRevision),
9646 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
9647 ENTRY(iem.s.CodeTlb.cTlbHits),
9648 ENTRY(pVMR3),
9649 ENTRY(cpum.GstCtx.rax),
9650 ENTRY(cpum.GstCtx.ah),
9651 ENTRY(cpum.GstCtx.rcx),
9652 ENTRY(cpum.GstCtx.ch),
9653 ENTRY(cpum.GstCtx.rdx),
9654 ENTRY(cpum.GstCtx.dh),
9655 ENTRY(cpum.GstCtx.rbx),
9656 ENTRY(cpum.GstCtx.bh),
9657 ENTRY(cpum.GstCtx.rsp),
9658 ENTRY(cpum.GstCtx.rbp),
9659 ENTRY(cpum.GstCtx.rsi),
9660 ENTRY(cpum.GstCtx.rdi),
9661 ENTRY(cpum.GstCtx.r8),
9662 ENTRY(cpum.GstCtx.r9),
9663 ENTRY(cpum.GstCtx.r10),
9664 ENTRY(cpum.GstCtx.r11),
9665 ENTRY(cpum.GstCtx.r12),
9666 ENTRY(cpum.GstCtx.r13),
9667 ENTRY(cpum.GstCtx.r14),
9668 ENTRY(cpum.GstCtx.r15),
9669 ENTRY(cpum.GstCtx.es.Sel),
9670 ENTRY(cpum.GstCtx.es.u64Base),
9671 ENTRY(cpum.GstCtx.es.u32Limit),
9672 ENTRY(cpum.GstCtx.es.Attr),
9673 ENTRY(cpum.GstCtx.cs.Sel),
9674 ENTRY(cpum.GstCtx.cs.u64Base),
9675 ENTRY(cpum.GstCtx.cs.u32Limit),
9676 ENTRY(cpum.GstCtx.cs.Attr),
9677 ENTRY(cpum.GstCtx.ss.Sel),
9678 ENTRY(cpum.GstCtx.ss.u64Base),
9679 ENTRY(cpum.GstCtx.ss.u32Limit),
9680 ENTRY(cpum.GstCtx.ss.Attr),
9681 ENTRY(cpum.GstCtx.ds.Sel),
9682 ENTRY(cpum.GstCtx.ds.u64Base),
9683 ENTRY(cpum.GstCtx.ds.u32Limit),
9684 ENTRY(cpum.GstCtx.ds.Attr),
9685 ENTRY(cpum.GstCtx.fs.Sel),
9686 ENTRY(cpum.GstCtx.fs.u64Base),
9687 ENTRY(cpum.GstCtx.fs.u32Limit),
9688 ENTRY(cpum.GstCtx.fs.Attr),
9689 ENTRY(cpum.GstCtx.gs.Sel),
9690 ENTRY(cpum.GstCtx.gs.u64Base),
9691 ENTRY(cpum.GstCtx.gs.u32Limit),
9692 ENTRY(cpum.GstCtx.gs.Attr),
9693 ENTRY(cpum.GstCtx.rip),
9694 ENTRY(cpum.GstCtx.eflags),
9695 ENTRY(cpum.GstCtx.uRipInhibitInt),
9696 ENTRY(cpum.GstCtx.cr0),
9697 ENTRY(cpum.GstCtx.cr4),
9698 ENTRY(cpum.GstCtx.aXcr[0]),
9699 ENTRY(cpum.GstCtx.aXcr[1]),
9700#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9701 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
9702 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
9703 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
9704 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
9705 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
9706 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
9707 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
9708 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
9709 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
9710 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
9711 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
9712 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
9713 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
9714 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
9715 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
9716 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
9717 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
9718 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
9719 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9720 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9721 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9722 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9723 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9724 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9725 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9726 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9727 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9728 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9729 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9730 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9731 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9732 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9733#endif
9734#undef ENTRY
9735 };
9736#ifdef VBOX_STRICT
9737 static bool s_fOrderChecked = false;
9738 if (!s_fOrderChecked)
9739 {
9740 s_fOrderChecked = true;
9741 uint32_t offPrev = s_aMembers[0].off;
9742 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9743 {
9744 Assert(s_aMembers[i].off > offPrev);
9745 offPrev = s_aMembers[i].off;
9746 }
9747 }
9748#endif
9749
9750 /*
9751 * Binary lookup.
9752 */
9753 unsigned iStart = 0;
9754 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9755 for (;;)
9756 {
9757 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9758 uint32_t const offCur = s_aMembers[iCur].off;
9759 if (off < offCur)
9760 {
9761 if (iCur != iStart)
9762 iEnd = iCur;
9763 else
9764 break;
9765 }
9766 else if (off > offCur)
9767 {
9768 if (iCur + 1 < iEnd)
9769 iStart = iCur + 1;
9770 else
9771 break;
9772 }
9773 else
9774 return s_aMembers[iCur].pszName;
9775 }
9776#ifdef VBOX_WITH_STATISTICS
9777 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9778 return "iem.s.acThreadedFuncStats[iFn]";
9779#endif
9780 return NULL;
9781}
9782
9783
9784DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9785{
9786 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9787#if defined(RT_ARCH_AMD64)
9788 static const char * const a_apszMarkers[] =
9789 {
9790 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9791 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9792 };
9793#endif
9794
9795 char szDisBuf[512];
9796 DISSTATE Dis;
9797 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9798 uint32_t const cNative = pTb->Native.cInstructions;
9799 uint32_t offNative = 0;
9800#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9801 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9802#endif
9803 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9804 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9805 : DISCPUMODE_64BIT;
9806#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9807 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9808#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9809 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9810#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9811# error "Port me"
9812#else
9813 csh hDisasm = ~(size_t)0;
9814# if defined(RT_ARCH_AMD64)
9815 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9816# elif defined(RT_ARCH_ARM64)
9817 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9818# else
9819# error "Port me"
9820# endif
9821 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9822
9823 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9824 //Assert(rcCs == CS_ERR_OK);
9825#endif
9826
9827 /*
9828 * Print TB info.
9829 */
9830 pHlp->pfnPrintf(pHlp,
9831 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9832 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9833 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9834 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9835#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9836 if (pDbgInfo && pDbgInfo->cEntries > 1)
9837 {
9838 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9839
9840 /*
9841 * This disassembly is driven by the debug info which follows the native
9842 * code and indicates when it starts with the next guest instructions,
9843 * where labels are and such things.
9844 */
9845 uint32_t idxThreadedCall = 0;
9846 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9847 uint8_t idxRange = UINT8_MAX;
9848 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9849 uint32_t offRange = 0;
9850 uint32_t offOpcodes = 0;
9851 uint32_t const cbOpcodes = pTb->cbOpcodes;
9852 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9853 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9854 uint32_t iDbgEntry = 1;
9855 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9856
9857 while (offNative < cNative)
9858 {
9859 /* If we're at or have passed the point where the next chunk of debug
9860 info starts, process it. */
9861 if (offDbgNativeNext <= offNative)
9862 {
9863 offDbgNativeNext = UINT32_MAX;
9864 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9865 {
9866 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9867 {
9868 case kIemTbDbgEntryType_GuestInstruction:
9869 {
9870 /* Did the exec flag change? */
9871 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9872 {
9873 pHlp->pfnPrintf(pHlp,
9874 " fExec change %#08x -> %#08x %s\n",
9875 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9876 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9877 szDisBuf, sizeof(szDisBuf)));
9878 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9879 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9880 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9881 : DISCPUMODE_64BIT;
9882 }
9883
9884 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9885 where the compilation was aborted before the opcode was recorded and the actual
9886 instruction was translated to a threaded call. This may happen when we run out
9887 of ranges, or when some complicated interrupts/FFs are found to be pending or
9888 similar. So, we just deal with it here rather than in the compiler code as it
9889 is a lot simpler to do here. */
9890 if ( idxRange == UINT8_MAX
9891 || idxRange >= cRanges
9892 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9893 {
9894 idxRange += 1;
9895 if (idxRange < cRanges)
9896 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9897 else
9898 continue;
9899 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9900 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9901 + (pTb->aRanges[idxRange].idxPhysPage == 0
9902 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9903 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9904 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9905 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9906 pTb->aRanges[idxRange].idxPhysPage);
9907 GCPhysPc += offRange;
9908 }
9909
9910 /* Disassemble the instruction. */
9911 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9912 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9913 uint32_t cbInstr = 1;
9914 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9915 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9916 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9917 if (RT_SUCCESS(rc))
9918 {
9919 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9920 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9921 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9922 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9923
9924 static unsigned const s_offMarker = 55;
9925 static char const s_szMarker[] = " ; <--- guest";
9926 if (cch < s_offMarker)
9927 {
9928 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9929 cch = s_offMarker;
9930 }
9931 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9932 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9933
9934 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9935 }
9936 else
9937 {
9938 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9939 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9940 cbInstr = 1;
9941 }
9942 GCPhysPc += cbInstr;
9943 offOpcodes += cbInstr;
9944 offRange += cbInstr;
9945 continue;
9946 }
9947
9948 case kIemTbDbgEntryType_ThreadedCall:
9949 pHlp->pfnPrintf(pHlp,
9950 " Call #%u to %s (%u args) - %s\n",
9951 idxThreadedCall,
9952 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9953 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9954 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9955 idxThreadedCall++;
9956 continue;
9957
9958 case kIemTbDbgEntryType_GuestRegShadowing:
9959 {
9960 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9961 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9962 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9963 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9964 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9965 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9966 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9967 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9968 else
9969 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9970 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9971 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9972 continue;
9973 }
9974
9975#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9976 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9977 {
9978 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9979 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9980 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9981 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9982 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9983 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9984 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9985 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9986 else
9987 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9988 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9989 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9990 continue;
9991 }
9992#endif
9993
9994 case kIemTbDbgEntryType_Label:
9995 {
9996 const char *pszName = "what_the_fudge";
9997 const char *pszComment = "";
9998 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9999 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
10000 {
10001 case kIemNativeLabelType_Return: pszName = "Return"; break;
10002 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
10003 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
10004 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
10005 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
10006 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
10007 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
10008 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
10009 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
10010 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
10011 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
10012 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
10013 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
10014 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
10015 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
10016 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
10017 case kIemNativeLabelType_If:
10018 pszName = "If";
10019 fNumbered = true;
10020 break;
10021 case kIemNativeLabelType_Else:
10022 pszName = "Else";
10023 fNumbered = true;
10024 pszComment = " ; regs state restored pre-if-block";
10025 break;
10026 case kIemNativeLabelType_Endif:
10027 pszName = "Endif";
10028 fNumbered = true;
10029 break;
10030 case kIemNativeLabelType_CheckIrq:
10031 pszName = "CheckIrq_CheckVM";
10032 fNumbered = true;
10033 break;
10034 case kIemNativeLabelType_TlbLookup:
10035 pszName = "TlbLookup";
10036 fNumbered = true;
10037 break;
10038 case kIemNativeLabelType_TlbMiss:
10039 pszName = "TlbMiss";
10040 fNumbered = true;
10041 break;
10042 case kIemNativeLabelType_TlbDone:
10043 pszName = "TlbDone";
10044 fNumbered = true;
10045 break;
10046 case kIemNativeLabelType_Invalid:
10047 case kIemNativeLabelType_End:
10048 break;
10049 }
10050 if (fNumbered)
10051 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
10052 else
10053 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
10054 continue;
10055 }
10056
10057 case kIemTbDbgEntryType_NativeOffset:
10058 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
10059 Assert(offDbgNativeNext >= offNative);
10060 break;
10061
10062#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
10063 case kIemTbDbgEntryType_DelayedPcUpdate:
10064 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
10065 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
10066 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
10067 continue;
10068#endif
10069
10070#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10071 case kIemTbDbgEntryType_GuestRegDirty:
10072 {
10073 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
10074 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
10075 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
10076 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
10077 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
10078 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
10079 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
10080 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
10081 pszGstReg, pszHstReg);
10082 continue;
10083 }
10084
10085 case kIemTbDbgEntryType_GuestRegWriteback:
10086 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
10087 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
10088 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);
10089 continue;
10090#endif
10091
10092 default:
10093 AssertFailed();
10094 }
10095 iDbgEntry++;
10096 break;
10097 }
10098 }
10099
10100 /*
10101 * Disassemble the next native instruction.
10102 */
10103 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10104# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10105 uint32_t cbInstr = sizeof(paNative[0]);
10106 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10107 if (RT_SUCCESS(rc))
10108 {
10109# if defined(RT_ARCH_AMD64)
10110 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10111 {
10112 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10113 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10114 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10115 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10116 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10117 uInfo & 0x8000 ? "recompiled" : "todo");
10118 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10119 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10120 else
10121 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10122 }
10123 else
10124# endif
10125 {
10126 const char *pszAnnotation = NULL;
10127# ifdef RT_ARCH_AMD64
10128 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10129 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10130 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10131 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10132 PCDISOPPARAM pMemOp;
10133 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
10134 pMemOp = &Dis.Param1;
10135 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
10136 pMemOp = &Dis.Param2;
10137 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
10138 pMemOp = &Dis.Param3;
10139 else
10140 pMemOp = NULL;
10141 if ( pMemOp
10142 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
10143 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
10144 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
10145 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
10146
10147#elif defined(RT_ARCH_ARM64)
10148 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10149 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10150 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10151# else
10152# error "Port me"
10153# endif
10154 if (pszAnnotation)
10155 {
10156 static unsigned const s_offAnnotation = 55;
10157 size_t const cchAnnotation = strlen(pszAnnotation);
10158 size_t cchDis = strlen(szDisBuf);
10159 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
10160 {
10161 if (cchDis < s_offAnnotation)
10162 {
10163 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
10164 cchDis = s_offAnnotation;
10165 }
10166 szDisBuf[cchDis++] = ' ';
10167 szDisBuf[cchDis++] = ';';
10168 szDisBuf[cchDis++] = ' ';
10169 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
10170 }
10171 }
10172 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10173 }
10174 }
10175 else
10176 {
10177# if defined(RT_ARCH_AMD64)
10178 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10179 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10180# elif defined(RT_ARCH_ARM64)
10181 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10182# else
10183# error "Port me"
10184# endif
10185 cbInstr = sizeof(paNative[0]);
10186 }
10187 offNative += cbInstr / sizeof(paNative[0]);
10188
10189# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10190 cs_insn *pInstr;
10191 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10192 (uintptr_t)pNativeCur, 1, &pInstr);
10193 if (cInstrs > 0)
10194 {
10195 Assert(cInstrs == 1);
10196 const char *pszAnnotation = NULL;
10197# if defined(RT_ARCH_ARM64)
10198 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
10199 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
10200 {
10201 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
10202 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
10203 char *psz = strchr(pInstr->op_str, '[');
10204 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
10205 {
10206 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
10207 int32_t off = -1;
10208 psz += 4;
10209 if (*psz == ']')
10210 off = 0;
10211 else if (*psz == ',')
10212 {
10213 psz = RTStrStripL(psz + 1);
10214 if (*psz == '#')
10215 off = RTStrToInt32(&psz[1]);
10216 /** @todo deal with index registers and LSL as well... */
10217 }
10218 if (off >= 0)
10219 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
10220 }
10221 }
10222# endif
10223
10224 size_t const cchOp = strlen(pInstr->op_str);
10225# if defined(RT_ARCH_AMD64)
10226 if (pszAnnotation)
10227 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
10228 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
10229 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10230 else
10231 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10232 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10233
10234# else
10235 if (pszAnnotation)
10236 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
10237 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
10238 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10239 else
10240 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10241 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10242# endif
10243 offNative += pInstr->size / sizeof(*pNativeCur);
10244 cs_free(pInstr, cInstrs);
10245 }
10246 else
10247 {
10248# if defined(RT_ARCH_AMD64)
10249 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10250 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10251# else
10252 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10253# endif
10254 offNative++;
10255 }
10256# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10257 }
10258 }
10259 else
10260#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
10261 {
10262 /*
10263 * No debug info, just disassemble the x86 code and then the native code.
10264 *
10265 * First the guest code:
10266 */
10267 for (unsigned i = 0; i < pTb->cRanges; i++)
10268 {
10269 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
10270 + (pTb->aRanges[i].idxPhysPage == 0
10271 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
10272 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
10273 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
10274 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
10275 unsigned off = pTb->aRanges[i].offOpcodes;
10276 /** @todo this ain't working when crossing pages! */
10277 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
10278 while (off < cbOpcodes)
10279 {
10280 uint32_t cbInstr = 1;
10281 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
10282 &pTb->pabOpcodes[off], cbOpcodes - off,
10283 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
10284 if (RT_SUCCESS(rc))
10285 {
10286 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10287 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10288 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10289 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10290 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
10291 GCPhysPc += cbInstr;
10292 off += cbInstr;
10293 }
10294 else
10295 {
10296 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
10297 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
10298 break;
10299 }
10300 }
10301 }
10302
10303 /*
10304 * Then the native code:
10305 */
10306 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
10307 while (offNative < cNative)
10308 {
10309 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10310# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10311 uint32_t cbInstr = sizeof(paNative[0]);
10312 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10313 if (RT_SUCCESS(rc))
10314 {
10315# if defined(RT_ARCH_AMD64)
10316 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10317 {
10318 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10319 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10320 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10321 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10322 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10323 uInfo & 0x8000 ? "recompiled" : "todo");
10324 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10325 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10326 else
10327 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10328 }
10329 else
10330# endif
10331 {
10332# ifdef RT_ARCH_AMD64
10333 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10334 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10335 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10336 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10337# elif defined(RT_ARCH_ARM64)
10338 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10339 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10340 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10341# else
10342# error "Port me"
10343# endif
10344 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10345 }
10346 }
10347 else
10348 {
10349# if defined(RT_ARCH_AMD64)
10350 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10351 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10352# else
10353 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10354# endif
10355 cbInstr = sizeof(paNative[0]);
10356 }
10357 offNative += cbInstr / sizeof(paNative[0]);
10358
10359# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10360 cs_insn *pInstr;
10361 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10362 (uintptr_t)pNativeCur, 1, &pInstr);
10363 if (cInstrs > 0)
10364 {
10365 Assert(cInstrs == 1);
10366# if defined(RT_ARCH_AMD64)
10367 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10368 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10369# else
10370 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10371 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10372# endif
10373 offNative += pInstr->size / sizeof(*pNativeCur);
10374 cs_free(pInstr, cInstrs);
10375 }
10376 else
10377 {
10378# if defined(RT_ARCH_AMD64)
10379 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10380 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10381# else
10382 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10383# endif
10384 offNative++;
10385 }
10386# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10387 }
10388 }
10389
10390#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10391 /* Cleanup. */
10392 cs_close(&hDisasm);
10393#endif
10394}
10395
10396
10397/**
10398 * Recompiles the given threaded TB into a native one.
10399 *
10400 * In case of failure the translation block will be returned as-is.
10401 *
10402 * @returns pTb.
10403 * @param pVCpu The cross context virtual CPU structure of the calling
10404 * thread.
10405 * @param pTb The threaded translation to recompile to native.
10406 */
10407DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10408{
10409 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10410
10411 /*
10412 * The first time thru, we allocate the recompiler state, the other times
10413 * we just need to reset it before using it again.
10414 */
10415 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10416 if (RT_LIKELY(pReNative))
10417 iemNativeReInit(pReNative, pTb);
10418 else
10419 {
10420 pReNative = iemNativeInit(pVCpu, pTb);
10421 AssertReturn(pReNative, pTb);
10422 }
10423
10424#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10425 /*
10426 * First do liveness analysis. This is done backwards.
10427 */
10428 {
10429 uint32_t idxCall = pTb->Thrd.cCalls;
10430 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10431 { /* likely */ }
10432 else
10433 {
10434 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10435 while (idxCall > cAlloc)
10436 cAlloc *= 2;
10437 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10438 AssertReturn(pvNew, pTb);
10439 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10440 pReNative->cLivenessEntriesAlloc = cAlloc;
10441 }
10442 AssertReturn(idxCall > 0, pTb);
10443 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10444
10445 /* The initial (final) entry. */
10446 idxCall--;
10447 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10448
10449 /* Loop backwards thru the calls and fill in the other entries. */
10450 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10451 while (idxCall > 0)
10452 {
10453 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10454 if (pfnLiveness)
10455 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10456 else
10457 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
10458 pCallEntry--;
10459 idxCall--;
10460 }
10461
10462# ifdef VBOX_WITH_STATISTICS
10463 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
10464 to 'clobbered' rather that 'input'. */
10465 /** @todo */
10466# endif
10467 }
10468#endif
10469
10470 /*
10471 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10472 * for aborting if an error happens.
10473 */
10474 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10475#ifdef LOG_ENABLED
10476 uint32_t const cCallsOrg = cCallsLeft;
10477#endif
10478 uint32_t off = 0;
10479 int rc = VINF_SUCCESS;
10480 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10481 {
10482 /*
10483 * Emit prolog code (fixed).
10484 */
10485 off = iemNativeEmitProlog(pReNative, off);
10486
10487 /*
10488 * Convert the calls to native code.
10489 */
10490#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10491 int32_t iGstInstr = -1;
10492#endif
10493#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10494 uint32_t cThreadedCalls = 0;
10495 uint32_t cRecompiledCalls = 0;
10496#endif
10497#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10498 uint32_t idxCurCall = 0;
10499#endif
10500 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10501 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10502 while (cCallsLeft-- > 0)
10503 {
10504 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10505#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10506 pReNative->idxCurCall = idxCurCall;
10507#endif
10508
10509 /*
10510 * Debug info, assembly markup and statistics.
10511 */
10512#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10513 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10514 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10515#endif
10516#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10517 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10518 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10519 {
10520 if (iGstInstr < (int32_t)pTb->cInstructions)
10521 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10522 else
10523 Assert(iGstInstr == pTb->cInstructions);
10524 iGstInstr = pCallEntry->idxInstr;
10525 }
10526 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10527#endif
10528#if defined(VBOX_STRICT)
10529 off = iemNativeEmitMarker(pReNative, off,
10530 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10531#endif
10532#if defined(VBOX_STRICT)
10533 iemNativeRegAssertSanity(pReNative);
10534#endif
10535#ifdef VBOX_WITH_STATISTICS
10536 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10537#endif
10538
10539 /*
10540 * Actual work.
10541 */
10542 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
10543 pfnRecom ? "(recompiled)" : "(todo)"));
10544 if (pfnRecom) /** @todo stats on this. */
10545 {
10546 off = pfnRecom(pReNative, off, pCallEntry);
10547 STAM_REL_STATS({cRecompiledCalls++;});
10548 }
10549 else
10550 {
10551 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10552 STAM_REL_STATS({cThreadedCalls++;});
10553 }
10554 Assert(off <= pReNative->cInstrBufAlloc);
10555 Assert(pReNative->cCondDepth == 0);
10556
10557#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10558 if (LogIs2Enabled())
10559 {
10560 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10561# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10562 static const char s_achState[] = "CUXI";
10563# else
10564 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10565# endif
10566
10567 char szGpr[17];
10568 for (unsigned i = 0; i < 16; i++)
10569 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10570 szGpr[16] = '\0';
10571
10572 char szSegBase[X86_SREG_COUNT + 1];
10573 char szSegLimit[X86_SREG_COUNT + 1];
10574 char szSegAttrib[X86_SREG_COUNT + 1];
10575 char szSegSel[X86_SREG_COUNT + 1];
10576 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10577 {
10578 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10579 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10580 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10581 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10582 }
10583 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10584 = szSegSel[X86_SREG_COUNT] = '\0';
10585
10586 char szEFlags[8];
10587 for (unsigned i = 0; i < 7; i++)
10588 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10589 szEFlags[7] = '\0';
10590
10591 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10592 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10593 }
10594#endif
10595
10596 /*
10597 * Advance.
10598 */
10599 pCallEntry++;
10600#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10601 idxCurCall++;
10602#endif
10603 }
10604
10605 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10606 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10607 if (!cThreadedCalls)
10608 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10609
10610 /*
10611 * Emit the epilog code.
10612 */
10613 uint32_t idxReturnLabel;
10614 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10615
10616 /*
10617 * Generate special jump labels.
10618 */
10619 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10620 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10621 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10622 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10623
10624 /*
10625 * Generate simple TB tail labels that just calls a help with a pVCpu
10626 * arg and either return or longjmps/throws a non-zero status.
10627 *
10628 * The array entries must be ordered by enmLabel value so we can index
10629 * using fTailLabels bit numbers.
10630 */
10631 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10632 static struct
10633 {
10634 IEMNATIVELABELTYPE enmLabel;
10635 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10636 } const g_aSimpleTailLabels[] =
10637 {
10638 { kIemNativeLabelType_Invalid, NULL },
10639 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10640 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10641 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10642 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10643 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10644 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10645 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10646 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10647 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10648 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10649 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10650 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10651 };
10652 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10653 AssertCompile(kIemNativeLabelType_Invalid == 0);
10654 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10655 if (fTailLabels)
10656 {
10657 do
10658 {
10659 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10660 fTailLabels &= ~RT_BIT_64(enmLabel);
10661 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10662
10663 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10664 Assert(idxLabel != UINT32_MAX);
10665 if (idxLabel != UINT32_MAX)
10666 {
10667 iemNativeLabelDefine(pReNative, idxLabel, off);
10668
10669 /* int pfnCallback(PVMCPUCC pVCpu) */
10670 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10671 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10672
10673 /* jump back to the return sequence. */
10674 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10675 }
10676
10677 } while (fTailLabels);
10678 }
10679 }
10680 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10681 {
10682 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10683 return pTb;
10684 }
10685 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10686 Assert(off <= pReNative->cInstrBufAlloc);
10687
10688 /*
10689 * Make sure all labels has been defined.
10690 */
10691 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10692#ifdef VBOX_STRICT
10693 uint32_t const cLabels = pReNative->cLabels;
10694 for (uint32_t i = 0; i < cLabels; i++)
10695 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10696#endif
10697
10698 /*
10699 * Allocate executable memory, copy over the code we've generated.
10700 */
10701 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10702 if (pTbAllocator->pDelayedFreeHead)
10703 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10704
10705 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
10706 AssertReturn(paFinalInstrBuf, pTb);
10707 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10708
10709 /*
10710 * Apply fixups.
10711 */
10712 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10713 uint32_t const cFixups = pReNative->cFixups;
10714 for (uint32_t i = 0; i < cFixups; i++)
10715 {
10716 Assert(paFixups[i].off < off);
10717 Assert(paFixups[i].idxLabel < cLabels);
10718 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10719 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10720 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10721 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10722 switch (paFixups[i].enmType)
10723 {
10724#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10725 case kIemNativeFixupType_Rel32:
10726 Assert(paFixups[i].off + 4 <= off);
10727 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10728 continue;
10729
10730#elif defined(RT_ARCH_ARM64)
10731 case kIemNativeFixupType_RelImm26At0:
10732 {
10733 Assert(paFixups[i].off < off);
10734 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10735 Assert(offDisp >= -262144 && offDisp < 262144);
10736 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10737 continue;
10738 }
10739
10740 case kIemNativeFixupType_RelImm19At5:
10741 {
10742 Assert(paFixups[i].off < off);
10743 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10744 Assert(offDisp >= -262144 && offDisp < 262144);
10745 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10746 continue;
10747 }
10748
10749 case kIemNativeFixupType_RelImm14At5:
10750 {
10751 Assert(paFixups[i].off < off);
10752 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10753 Assert(offDisp >= -8192 && offDisp < 8192);
10754 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10755 continue;
10756 }
10757
10758#endif
10759 case kIemNativeFixupType_Invalid:
10760 case kIemNativeFixupType_End:
10761 break;
10762 }
10763 AssertFailed();
10764 }
10765
10766 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10767 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10768
10769 /*
10770 * Convert the translation block.
10771 */
10772 RTMemFree(pTb->Thrd.paCalls);
10773 pTb->Native.paInstructions = paFinalInstrBuf;
10774 pTb->Native.cInstructions = off;
10775 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10776#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10777 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10778 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10779#endif
10780
10781 Assert(pTbAllocator->cThreadedTbs > 0);
10782 pTbAllocator->cThreadedTbs -= 1;
10783 pTbAllocator->cNativeTbs += 1;
10784 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10785
10786#ifdef LOG_ENABLED
10787 /*
10788 * Disassemble to the log if enabled.
10789 */
10790 if (LogIs3Enabled())
10791 {
10792 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10793 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10794# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10795 RTLogFlush(NULL);
10796# endif
10797 }
10798#endif
10799 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10800
10801 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10802 return pTb;
10803}
10804
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette