VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103973

Last change on this file since 103973 was 103964, checked in by vboxsync, 13 months ago

VMM/IEM: Some statistics on the SIMD register allocator and some fixes, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 427.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103964 2024-03-20 15:01:46Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1874/**
1875 * Used by TB code to load 128-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1880 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1881#else
1882 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to load 128-bit data w/ segmentation.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1893 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1894#else
1895 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1896#endif
1897}
1898
1899
1900/**
1901 * Used by TB code to load 128-bit data w/ segmentation.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1906 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1907#else
1908 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to load 256-bit data w/ segmentation.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1919 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1920#else
1921 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1922#endif
1923}
1924
1925
1926/**
1927 * Used by TB code to load 256-bit data w/ segmentation.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1932 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1933#else
1934 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1935#endif
1936}
1937#endif
1938
1939
1940/**
1941 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1946 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1947#else
1948 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1949#endif
1950}
1951
1952
1953/**
1954 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1955 */
1956IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1957{
1958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1959 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1960#else
1961 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1962#endif
1963}
1964
1965
1966/**
1967 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1968 */
1969IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1970{
1971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1972 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1973#else
1974 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1975#endif
1976}
1977
1978
1979/**
1980 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1985 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1986#else
1987 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1988#endif
1989}
1990
1991
1992#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1993/**
1994 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1999 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2000#else
2001 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2008 */
2009IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2010{
2011#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2012 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2013#else
2014 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2015#endif
2016}
2017
2018
2019/**
2020 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2021 */
2022IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2023{
2024#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2025 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2026#else
2027 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2028#endif
2029}
2030
2031
2032/**
2033 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2036{
2037#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2038 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2039#else
2040 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2041#endif
2042}
2043#endif
2044
2045
2046
2047/**
2048 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2053 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2054#else
2055 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2056#endif
2057}
2058
2059
2060/**
2061 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2066 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2067#else
2068 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to store an 32-bit selector value onto a generic stack.
2075 *
2076 * Intel CPUs doesn't do write a whole dword, thus the special function.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2081 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2082#else
2083 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2092{
2093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2094 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2095#else
2096 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2097#endif
2098}
2099
2100
2101/**
2102 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2103 */
2104IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2105{
2106#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2107 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2108#else
2109 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2110#endif
2111}
2112
2113
2114/**
2115 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2116 */
2117IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2118{
2119#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2120 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2121#else
2122 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2123#endif
2124}
2125
2126
2127/**
2128 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2133 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2134#else
2135 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2136#endif
2137}
2138
2139
2140
2141/*********************************************************************************************************************************
2142* Helpers: Flat memory fetches and stores. *
2143*********************************************************************************************************************************/
2144
2145/**
2146 * Used by TB code to load unsigned 8-bit data w/ flat address.
2147 * @note Zero extending the value to 64-bit to simplify assembly.
2148 */
2149IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2150{
2151#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2152 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2153#else
2154 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2155#endif
2156}
2157
2158
2159/**
2160 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2161 * to 16 bits.
2162 * @note Zero extending the value to 64-bit to simplify assembly.
2163 */
2164IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2165{
2166#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2167 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2168#else
2169 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2170#endif
2171}
2172
2173
2174/**
2175 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2176 * to 32 bits.
2177 * @note Zero extending the value to 64-bit to simplify assembly.
2178 */
2179IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2180{
2181#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2182 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2183#else
2184 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2185#endif
2186}
2187
2188
2189/**
2190 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2191 * to 64 bits.
2192 */
2193IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2194{
2195#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2196 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2197#else
2198 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2199#endif
2200}
2201
2202
2203/**
2204 * Used by TB code to load unsigned 16-bit data w/ flat address.
2205 * @note Zero extending the value to 64-bit to simplify assembly.
2206 */
2207IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2208{
2209#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2210 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2211#else
2212 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2213#endif
2214}
2215
2216
2217/**
2218 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2219 * to 32 bits.
2220 * @note Zero extending the value to 64-bit to simplify assembly.
2221 */
2222IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2223{
2224#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2225 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2226#else
2227 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2228#endif
2229}
2230
2231
2232/**
2233 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2234 * to 64 bits.
2235 * @note Zero extending the value to 64-bit to simplify assembly.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2240 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2241#else
2242 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to load unsigned 32-bit data w/ flat address.
2249 * @note Zero extending the value to 64-bit to simplify assembly.
2250 */
2251IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2252{
2253#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2254 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2255#else
2256 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2257#endif
2258}
2259
2260
2261/**
2262 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2263 * to 64 bits.
2264 * @note Zero extending the value to 64-bit to simplify assembly.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2269 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2270#else
2271 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276/**
2277 * Used by TB code to load unsigned 64-bit data w/ flat address.
2278 */
2279IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2280{
2281#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2282 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2283#else
2284 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2285#endif
2286}
2287
2288
2289#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2290/**
2291 * Used by TB code to load unsigned 128-bit data w/ flat address.
2292 */
2293IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2294{
2295#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2296 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2297#else
2298 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2299#endif
2300}
2301
2302
2303/**
2304 * Used by TB code to load unsigned 128-bit data w/ flat address.
2305 */
2306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2307{
2308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2309 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2310#else
2311 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2312#endif
2313}
2314
2315
2316/**
2317 * Used by TB code to load unsigned 128-bit data w/ flat address.
2318 */
2319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2322 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2323#else
2324 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to load unsigned 256-bit data w/ flat address.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2333{
2334#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2335 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2336#else
2337 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2338#endif
2339}
2340
2341
2342/**
2343 * Used by TB code to load unsigned 256-bit data w/ flat address.
2344 */
2345IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2346{
2347#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2348 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2349#else
2350 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2351#endif
2352}
2353#endif
2354
2355
2356/**
2357 * Used by TB code to store unsigned 8-bit data w/ flat address.
2358 */
2359IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2360{
2361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2362 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2363#else
2364 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2365#endif
2366}
2367
2368
2369/**
2370 * Used by TB code to store unsigned 16-bit data w/ flat address.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2373{
2374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2375 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2376#else
2377 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2378#endif
2379}
2380
2381
2382/**
2383 * Used by TB code to store unsigned 32-bit data w/ flat address.
2384 */
2385IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2388 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2389#else
2390 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to store unsigned 64-bit data w/ flat address.
2397 */
2398IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2399{
2400#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2401 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2402#else
2403 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2404#endif
2405}
2406
2407
2408#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2409/**
2410 * Used by TB code to store unsigned 128-bit data w/ flat address.
2411 */
2412IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2413{
2414#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2415 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2416#else
2417 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2418#endif
2419}
2420
2421
2422/**
2423 * Used by TB code to store unsigned 128-bit data w/ flat address.
2424 */
2425IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2426{
2427#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2428 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2429#else
2430 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2431#endif
2432}
2433
2434
2435/**
2436 * Used by TB code to store unsigned 256-bit data w/ flat address.
2437 */
2438IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2439{
2440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2441 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2442#else
2443 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2444#endif
2445}
2446
2447
2448/**
2449 * Used by TB code to store unsigned 256-bit data w/ flat address.
2450 */
2451IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2452{
2453#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2454 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2455#else
2456 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2457#endif
2458}
2459#endif
2460
2461
2462
2463/**
2464 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2465 */
2466IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2467{
2468#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2469 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2470#else
2471 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2472#endif
2473}
2474
2475
2476/**
2477 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2478 */
2479IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2480{
2481#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2482 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2483#else
2484 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2485#endif
2486}
2487
2488
2489/**
2490 * Used by TB code to store a segment selector value onto a flat stack.
2491 *
2492 * Intel CPUs doesn't do write a whole dword, thus the special function.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2495{
2496#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2497 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2498#else
2499 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2500#endif
2501}
2502
2503
2504/**
2505 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2506 */
2507IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2508{
2509#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2510 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2511#else
2512 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2513#endif
2514}
2515
2516
2517/**
2518 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2519 */
2520IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2521{
2522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2523 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2524#else
2525 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2526#endif
2527}
2528
2529
2530/**
2531 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2532 */
2533IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2534{
2535#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2536 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2537#else
2538 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2539#endif
2540}
2541
2542
2543/**
2544 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2545 */
2546IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2547{
2548#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2549 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2550#else
2551 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2552#endif
2553}
2554
2555
2556
2557/*********************************************************************************************************************************
2558* Helpers: Segmented memory mapping. *
2559*********************************************************************************************************************************/
2560
2561/**
2562 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2563 * segmentation.
2564 */
2565IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2566 RTGCPTR GCPtrMem, uint8_t iSegReg))
2567{
2568#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2569 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2570#else
2571 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2572#endif
2573}
2574
2575
2576/**
2577 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2578 */
2579IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2580 RTGCPTR GCPtrMem, uint8_t iSegReg))
2581{
2582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2583 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2584#else
2585 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2586#endif
2587}
2588
2589
2590/**
2591 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2592 */
2593IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2594 RTGCPTR GCPtrMem, uint8_t iSegReg))
2595{
2596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2597 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2598#else
2599 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2600#endif
2601}
2602
2603
2604/**
2605 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2606 */
2607IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2608 RTGCPTR GCPtrMem, uint8_t iSegReg))
2609{
2610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2611 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2612#else
2613 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2614#endif
2615}
2616
2617
2618/**
2619 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2620 * segmentation.
2621 */
2622IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2623 RTGCPTR GCPtrMem, uint8_t iSegReg))
2624{
2625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2626 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2627#else
2628 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#endif
2630}
2631
2632
2633/**
2634 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2635 */
2636IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2637 RTGCPTR GCPtrMem, uint8_t iSegReg))
2638{
2639#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2640 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2641#else
2642 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2643#endif
2644}
2645
2646
2647/**
2648 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2649 */
2650IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2651 RTGCPTR GCPtrMem, uint8_t iSegReg))
2652{
2653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2654 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2655#else
2656 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2657#endif
2658}
2659
2660
2661/**
2662 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2663 */
2664IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2665 RTGCPTR GCPtrMem, uint8_t iSegReg))
2666{
2667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2668 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2669#else
2670 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2671#endif
2672}
2673
2674
2675/**
2676 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2677 * segmentation.
2678 */
2679IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2680 RTGCPTR GCPtrMem, uint8_t iSegReg))
2681{
2682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2683 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2684#else
2685 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#endif
2687}
2688
2689
2690/**
2691 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2694 RTGCPTR GCPtrMem, uint8_t iSegReg))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2698#else
2699 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2706 */
2707IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2708 RTGCPTR GCPtrMem, uint8_t iSegReg))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2712#else
2713 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2722 RTGCPTR GCPtrMem, uint8_t iSegReg))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2726#else
2727 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2734 * segmentation.
2735 */
2736IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2737 RTGCPTR GCPtrMem, uint8_t iSegReg))
2738{
2739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2740 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2741#else
2742 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#endif
2744}
2745
2746
2747/**
2748 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2749 */
2750IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2751 RTGCPTR GCPtrMem, uint8_t iSegReg))
2752{
2753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2754 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2755#else
2756 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2757#endif
2758}
2759
2760
2761/**
2762 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2763 */
2764IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2765 RTGCPTR GCPtrMem, uint8_t iSegReg))
2766{
2767#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2768 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2769#else
2770 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2771#endif
2772}
2773
2774
2775/**
2776 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2777 */
2778IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2779 RTGCPTR GCPtrMem, uint8_t iSegReg))
2780{
2781#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2782 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2783#else
2784 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2785#endif
2786}
2787
2788
2789/**
2790 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2791 */
2792IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2793 RTGCPTR GCPtrMem, uint8_t iSegReg))
2794{
2795#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2796 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2797#else
2798 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2799#endif
2800}
2801
2802
2803/**
2804 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2805 */
2806IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2807 RTGCPTR GCPtrMem, uint8_t iSegReg))
2808{
2809#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2810 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2811#else
2812 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2813#endif
2814}
2815
2816
2817/**
2818 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2819 * segmentation.
2820 */
2821IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2822 RTGCPTR GCPtrMem, uint8_t iSegReg))
2823{
2824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2825 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2826#else
2827 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#endif
2829}
2830
2831
2832/**
2833 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2834 */
2835IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2836 RTGCPTR GCPtrMem, uint8_t iSegReg))
2837{
2838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2839 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2840#else
2841 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2842#endif
2843}
2844
2845
2846/**
2847 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2848 */
2849IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2850 RTGCPTR GCPtrMem, uint8_t iSegReg))
2851{
2852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2853 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2854#else
2855 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2856#endif
2857}
2858
2859
2860/**
2861 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2862 */
2863IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2864 RTGCPTR GCPtrMem, uint8_t iSegReg))
2865{
2866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2867 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2868#else
2869 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2870#endif
2871}
2872
2873
2874/*********************************************************************************************************************************
2875* Helpers: Flat memory mapping. *
2876*********************************************************************************************************************************/
2877
2878/**
2879 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2880 * address.
2881 */
2882IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2883{
2884#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2885 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2886#else
2887 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2888#endif
2889}
2890
2891
2892/**
2893 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2896{
2897#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2898 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2899#else
2900 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2901#endif
2902}
2903
2904
2905/**
2906 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2907 */
2908IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2909{
2910#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2911 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2912#else
2913 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2914#endif
2915}
2916
2917
2918/**
2919 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2920 */
2921IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2922{
2923#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2924 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2925#else
2926 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2927#endif
2928}
2929
2930
2931/**
2932 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2933 * address.
2934 */
2935IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2936{
2937#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2938 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2939#else
2940 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2941#endif
2942}
2943
2944
2945/**
2946 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2947 */
2948IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2949{
2950#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2951 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2952#else
2953 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2954#endif
2955}
2956
2957
2958/**
2959 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2960 */
2961IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2962{
2963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2964 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2965#else
2966 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2967#endif
2968}
2969
2970
2971/**
2972 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2973 */
2974IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2975{
2976#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2977 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2978#else
2979 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2980#endif
2981}
2982
2983
2984/**
2985 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2986 * address.
2987 */
2988IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2989{
2990#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2991 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2992#else
2993 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2994#endif
2995}
2996
2997
2998/**
2999 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3000 */
3001IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3002{
3003#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3004 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3005#else
3006 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3007#endif
3008}
3009
3010
3011/**
3012 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3013 */
3014IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3015{
3016#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3017 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3018#else
3019 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3020#endif
3021}
3022
3023
3024/**
3025 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3026 */
3027IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3028{
3029#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3030 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3031#else
3032 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3033#endif
3034}
3035
3036
3037/**
3038 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3039 * address.
3040 */
3041IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3042{
3043#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3044 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3045#else
3046 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3047#endif
3048}
3049
3050
3051/**
3052 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3053 */
3054IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3055{
3056#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3057 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3058#else
3059 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3060#endif
3061}
3062
3063
3064/**
3065 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3066 */
3067IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3068{
3069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3070 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3071#else
3072 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3073#endif
3074}
3075
3076
3077/**
3078 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3079 */
3080IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3081{
3082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3083 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3084#else
3085 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3086#endif
3087}
3088
3089
3090/**
3091 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3092 */
3093IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3094{
3095#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3096 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3097#else
3098 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3099#endif
3100}
3101
3102
3103/**
3104 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3105 */
3106IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3107{
3108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3109 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3110#else
3111 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3112#endif
3113}
3114
3115
3116/**
3117 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3118 * address.
3119 */
3120IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3121{
3122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3123 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3124#else
3125 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3126#endif
3127}
3128
3129
3130/**
3131 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3132 */
3133IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3134{
3135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3136 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3137#else
3138 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3139#endif
3140}
3141
3142
3143/**
3144 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3145 */
3146IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3147{
3148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3149 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3150#else
3151 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3152#endif
3153}
3154
3155
3156/**
3157 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3158 */
3159IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3160{
3161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3162 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3163#else
3164 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3165#endif
3166}
3167
3168
3169/*********************************************************************************************************************************
3170* Helpers: Commit, rollback & unmap *
3171*********************************************************************************************************************************/
3172
3173/**
3174 * Used by TB code to commit and unmap a read-write memory mapping.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3177{
3178 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3179}
3180
3181
3182/**
3183 * Used by TB code to commit and unmap a read-write memory mapping.
3184 */
3185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3186{
3187 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3188}
3189
3190
3191/**
3192 * Used by TB code to commit and unmap a write-only memory mapping.
3193 */
3194IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3195{
3196 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3197}
3198
3199
3200/**
3201 * Used by TB code to commit and unmap a read-only memory mapping.
3202 */
3203IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3204{
3205 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3206}
3207
3208
3209/**
3210 * Reinitializes the native recompiler state.
3211 *
3212 * Called before starting a new recompile job.
3213 */
3214static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3215{
3216 pReNative->cLabels = 0;
3217 pReNative->bmLabelTypes = 0;
3218 pReNative->cFixups = 0;
3219#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3220 pReNative->pDbgInfo->cEntries = 0;
3221#endif
3222 pReNative->pTbOrg = pTb;
3223 pReNative->cCondDepth = 0;
3224 pReNative->uCondSeqNo = 0;
3225 pReNative->uCheckIrqSeqNo = 0;
3226 pReNative->uTlbSeqNo = 0;
3227
3228#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3229 pReNative->Core.offPc = 0;
3230 pReNative->Core.cInstrPcUpdateSkipped = 0;
3231#endif
3232#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3233 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3234#endif
3235 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3236#if IEMNATIVE_HST_GREG_COUNT < 32
3237 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3238#endif
3239 ;
3240 pReNative->Core.bmHstRegsWithGstShadow = 0;
3241 pReNative->Core.bmGstRegShadows = 0;
3242 pReNative->Core.bmVars = 0;
3243 pReNative->Core.bmStack = 0;
3244 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3245 pReNative->Core.u64ArgVars = UINT64_MAX;
3246
3247 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3248 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3249 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3250 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3251 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3252 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3253 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3254 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3255 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3256 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3257 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3258 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3259 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3260 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3261 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3262 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3263 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3264
3265 /* Full host register reinit: */
3266 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3267 {
3268 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3269 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3270 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3271 }
3272
3273 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3274 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3275#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3276 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3277#endif
3278#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3279 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3280#endif
3281#ifdef IEMNATIVE_REG_FIXED_TMP1
3282 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3283#endif
3284#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3285 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3286#endif
3287 );
3288 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3289 {
3290 fRegs &= ~RT_BIT_32(idxReg);
3291 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3292 }
3293
3294 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3295#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3296 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3297#endif
3298#ifdef IEMNATIVE_REG_FIXED_TMP0
3299 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3300#endif
3301#ifdef IEMNATIVE_REG_FIXED_TMP1
3302 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3303#endif
3304#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3305 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3306#endif
3307
3308#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3309# ifdef RT_ARCH_ARM64
3310 /*
3311 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3312 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3313 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3314 * and the register allocator assumes that it will be always free when the lower is picked.
3315 */
3316 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3317# else
3318 uint32_t const fFixedAdditional = 0;
3319# endif
3320
3321 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3322 | fFixedAdditional
3323# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3324 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3325# endif
3326 ;
3327 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3328 pReNative->Core.bmGstSimdRegShadows = 0;
3329 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3330 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3331
3332 /* Full host register reinit: */
3333 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3334 {
3335 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3336 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3337 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3338 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3339 }
3340
3341 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3342 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3343 {
3344 fRegs &= ~RT_BIT_32(idxReg);
3345 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3346 }
3347
3348#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3349 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3350#endif
3351
3352#endif
3353
3354 return pReNative;
3355}
3356
3357
3358/**
3359 * Allocates and initializes the native recompiler state.
3360 *
3361 * This is called the first time an EMT wants to recompile something.
3362 *
3363 * @returns Pointer to the new recompiler state.
3364 * @param pVCpu The cross context virtual CPU structure of the calling
3365 * thread.
3366 * @param pTb The TB that's about to be recompiled.
3367 * @thread EMT(pVCpu)
3368 */
3369static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3370{
3371 VMCPU_ASSERT_EMT(pVCpu);
3372
3373 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3374 AssertReturn(pReNative, NULL);
3375
3376 /*
3377 * Try allocate all the buffers and stuff we need.
3378 */
3379 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3380 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3381 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3382#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3383 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3384#endif
3385 if (RT_LIKELY( pReNative->pInstrBuf
3386 && pReNative->paLabels
3387 && pReNative->paFixups)
3388#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3389 && pReNative->pDbgInfo
3390#endif
3391 )
3392 {
3393 /*
3394 * Set the buffer & array sizes on success.
3395 */
3396 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3397 pReNative->cLabelsAlloc = _8K;
3398 pReNative->cFixupsAlloc = _16K;
3399#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3400 pReNative->cDbgInfoAlloc = _16K;
3401#endif
3402
3403 /* Other constant stuff: */
3404 pReNative->pVCpu = pVCpu;
3405
3406 /*
3407 * Done, just need to save it and reinit it.
3408 */
3409 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3410 return iemNativeReInit(pReNative, pTb);
3411 }
3412
3413 /*
3414 * Failed. Cleanup and return.
3415 */
3416 AssertFailed();
3417 RTMemFree(pReNative->pInstrBuf);
3418 RTMemFree(pReNative->paLabels);
3419 RTMemFree(pReNative->paFixups);
3420#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3421 RTMemFree(pReNative->pDbgInfo);
3422#endif
3423 RTMemFree(pReNative);
3424 return NULL;
3425}
3426
3427
3428/**
3429 * Creates a label
3430 *
3431 * If the label does not yet have a defined position,
3432 * call iemNativeLabelDefine() later to set it.
3433 *
3434 * @returns Label ID. Throws VBox status code on failure, so no need to check
3435 * the return value.
3436 * @param pReNative The native recompile state.
3437 * @param enmType The label type.
3438 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3439 * label is not yet defined (default).
3440 * @param uData Data associated with the lable. Only applicable to
3441 * certain type of labels. Default is zero.
3442 */
3443DECL_HIDDEN_THROW(uint32_t)
3444iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3445 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3446{
3447 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3448
3449 /*
3450 * Locate existing label definition.
3451 *
3452 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3453 * and uData is zero.
3454 */
3455 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3456 uint32_t const cLabels = pReNative->cLabels;
3457 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3458#ifndef VBOX_STRICT
3459 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3460 && offWhere == UINT32_MAX
3461 && uData == 0
3462#endif
3463 )
3464 {
3465#ifndef VBOX_STRICT
3466 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3467 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3468 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3469 if (idxLabel < pReNative->cLabels)
3470 return idxLabel;
3471#else
3472 for (uint32_t i = 0; i < cLabels; i++)
3473 if ( paLabels[i].enmType == enmType
3474 && paLabels[i].uData == uData)
3475 {
3476 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3477 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3478 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3479 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3480 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3481 return i;
3482 }
3483 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3484 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3485#endif
3486 }
3487
3488 /*
3489 * Make sure we've got room for another label.
3490 */
3491 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3492 { /* likely */ }
3493 else
3494 {
3495 uint32_t cNew = pReNative->cLabelsAlloc;
3496 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3497 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3498 cNew *= 2;
3499 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3500 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3501 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3502 pReNative->paLabels = paLabels;
3503 pReNative->cLabelsAlloc = cNew;
3504 }
3505
3506 /*
3507 * Define a new label.
3508 */
3509 paLabels[cLabels].off = offWhere;
3510 paLabels[cLabels].enmType = enmType;
3511 paLabels[cLabels].uData = uData;
3512 pReNative->cLabels = cLabels + 1;
3513
3514 Assert((unsigned)enmType < 64);
3515 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3516
3517 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3518 {
3519 Assert(uData == 0);
3520 pReNative->aidxUniqueLabels[enmType] = cLabels;
3521 }
3522
3523 if (offWhere != UINT32_MAX)
3524 {
3525#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3526 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3527 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3528#endif
3529 }
3530 return cLabels;
3531}
3532
3533
3534/**
3535 * Defines the location of an existing label.
3536 *
3537 * @param pReNative The native recompile state.
3538 * @param idxLabel The label to define.
3539 * @param offWhere The position.
3540 */
3541DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3542{
3543 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3544 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3545 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3546 pLabel->off = offWhere;
3547#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3548 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3549 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3550#endif
3551}
3552
3553
3554/**
3555 * Looks up a lable.
3556 *
3557 * @returns Label ID if found, UINT32_MAX if not.
3558 */
3559static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3560 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3561{
3562 Assert((unsigned)enmType < 64);
3563 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3564 {
3565 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3566 return pReNative->aidxUniqueLabels[enmType];
3567
3568 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3569 uint32_t const cLabels = pReNative->cLabels;
3570 for (uint32_t i = 0; i < cLabels; i++)
3571 if ( paLabels[i].enmType == enmType
3572 && paLabels[i].uData == uData
3573 && ( paLabels[i].off == offWhere
3574 || offWhere == UINT32_MAX
3575 || paLabels[i].off == UINT32_MAX))
3576 return i;
3577 }
3578 return UINT32_MAX;
3579}
3580
3581
3582/**
3583 * Adds a fixup.
3584 *
3585 * @throws VBox status code (int) on failure.
3586 * @param pReNative The native recompile state.
3587 * @param offWhere The instruction offset of the fixup location.
3588 * @param idxLabel The target label ID for the fixup.
3589 * @param enmType The fixup type.
3590 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3591 */
3592DECL_HIDDEN_THROW(void)
3593iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3594 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3595{
3596 Assert(idxLabel <= UINT16_MAX);
3597 Assert((unsigned)enmType <= UINT8_MAX);
3598#ifdef RT_ARCH_ARM64
3599 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3600 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3601 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3602#endif
3603
3604 /*
3605 * Make sure we've room.
3606 */
3607 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3608 uint32_t const cFixups = pReNative->cFixups;
3609 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3610 { /* likely */ }
3611 else
3612 {
3613 uint32_t cNew = pReNative->cFixupsAlloc;
3614 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3615 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3616 cNew *= 2;
3617 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3618 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3619 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3620 pReNative->paFixups = paFixups;
3621 pReNative->cFixupsAlloc = cNew;
3622 }
3623
3624 /*
3625 * Add the fixup.
3626 */
3627 paFixups[cFixups].off = offWhere;
3628 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3629 paFixups[cFixups].enmType = enmType;
3630 paFixups[cFixups].offAddend = offAddend;
3631 pReNative->cFixups = cFixups + 1;
3632}
3633
3634
3635/**
3636 * Slow code path for iemNativeInstrBufEnsure.
3637 */
3638DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3639{
3640 /* Double the buffer size till we meet the request. */
3641 uint32_t cNew = pReNative->cInstrBufAlloc;
3642 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3643 do
3644 cNew *= 2;
3645 while (cNew < off + cInstrReq);
3646
3647 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3648#ifdef RT_ARCH_ARM64
3649 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3650#else
3651 uint32_t const cbMaxInstrBuf = _2M;
3652#endif
3653 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3654
3655 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3656 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3657
3658#ifdef VBOX_STRICT
3659 pReNative->offInstrBufChecked = off + cInstrReq;
3660#endif
3661 pReNative->cInstrBufAlloc = cNew;
3662 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3663}
3664
3665#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3666
3667/**
3668 * Grows the static debug info array used during recompilation.
3669 *
3670 * @returns Pointer to the new debug info block; throws VBox status code on
3671 * failure, so no need to check the return value.
3672 */
3673DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3674{
3675 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3676 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3677 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3678 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3679 pReNative->pDbgInfo = pDbgInfo;
3680 pReNative->cDbgInfoAlloc = cNew;
3681 return pDbgInfo;
3682}
3683
3684
3685/**
3686 * Adds a new debug info uninitialized entry, returning the pointer to it.
3687 */
3688DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3689{
3690 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3691 { /* likely */ }
3692 else
3693 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3694 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3695}
3696
3697
3698/**
3699 * Debug Info: Adds a native offset record, if necessary.
3700 */
3701DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3702{
3703 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3704
3705 /*
3706 * Search backwards to see if we've got a similar record already.
3707 */
3708 uint32_t idx = pDbgInfo->cEntries;
3709 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3710 while (idx-- > idxStop)
3711 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3712 {
3713 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3714 return;
3715 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3716 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3717 break;
3718 }
3719
3720 /*
3721 * Add it.
3722 */
3723 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3724 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3725 pEntry->NativeOffset.offNative = off;
3726}
3727
3728
3729/**
3730 * Debug Info: Record info about a label.
3731 */
3732static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3733{
3734 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3735 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3736 pEntry->Label.uUnused = 0;
3737 pEntry->Label.enmLabel = (uint8_t)enmType;
3738 pEntry->Label.uData = uData;
3739}
3740
3741
3742/**
3743 * Debug Info: Record info about a threaded call.
3744 */
3745static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3746{
3747 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3748 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3749 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3750 pEntry->ThreadedCall.uUnused = 0;
3751 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3752}
3753
3754
3755/**
3756 * Debug Info: Record info about a new guest instruction.
3757 */
3758static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3759{
3760 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3761 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3762 pEntry->GuestInstruction.uUnused = 0;
3763 pEntry->GuestInstruction.fExec = fExec;
3764}
3765
3766
3767/**
3768 * Debug Info: Record info about guest register shadowing.
3769 */
3770DECL_HIDDEN_THROW(void)
3771iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3772 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3773{
3774 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3775 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3776 pEntry->GuestRegShadowing.uUnused = 0;
3777 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3778 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3779 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3780}
3781
3782
3783# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3784/**
3785 * Debug Info: Record info about guest register shadowing.
3786 */
3787DECL_HIDDEN_THROW(void)
3788iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3789 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3790{
3791 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3792 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3793 pEntry->GuestSimdRegShadowing.uUnused = 0;
3794 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3795 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3796 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3797}
3798# endif
3799
3800
3801# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3802/**
3803 * Debug Info: Record info about delayed RIP updates.
3804 */
3805DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3806{
3807 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3808 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3809 pEntry->DelayedPcUpdate.offPc = offPc;
3810 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3811}
3812# endif
3813
3814#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3815
3816
3817/*********************************************************************************************************************************
3818* Register Allocator *
3819*********************************************************************************************************************************/
3820
3821/**
3822 * Register parameter indexes (indexed by argument number).
3823 */
3824DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3825{
3826 IEMNATIVE_CALL_ARG0_GREG,
3827 IEMNATIVE_CALL_ARG1_GREG,
3828 IEMNATIVE_CALL_ARG2_GREG,
3829 IEMNATIVE_CALL_ARG3_GREG,
3830#if defined(IEMNATIVE_CALL_ARG4_GREG)
3831 IEMNATIVE_CALL_ARG4_GREG,
3832# if defined(IEMNATIVE_CALL_ARG5_GREG)
3833 IEMNATIVE_CALL_ARG5_GREG,
3834# if defined(IEMNATIVE_CALL_ARG6_GREG)
3835 IEMNATIVE_CALL_ARG6_GREG,
3836# if defined(IEMNATIVE_CALL_ARG7_GREG)
3837 IEMNATIVE_CALL_ARG7_GREG,
3838# endif
3839# endif
3840# endif
3841#endif
3842};
3843AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3844
3845/**
3846 * Call register masks indexed by argument count.
3847 */
3848DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3849{
3850 0,
3851 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3852 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3853 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3854 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3855 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3856#if defined(IEMNATIVE_CALL_ARG4_GREG)
3857 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3858 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3859# if defined(IEMNATIVE_CALL_ARG5_GREG)
3860 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3861 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3862# if defined(IEMNATIVE_CALL_ARG6_GREG)
3863 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3864 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3865 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3866# if defined(IEMNATIVE_CALL_ARG7_GREG)
3867 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3868 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3869 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3870# endif
3871# endif
3872# endif
3873#endif
3874};
3875
3876#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3877/**
3878 * BP offset of the stack argument slots.
3879 *
3880 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3881 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3882 */
3883DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3884{
3885 IEMNATIVE_FP_OFF_STACK_ARG0,
3886# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3887 IEMNATIVE_FP_OFF_STACK_ARG1,
3888# endif
3889# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3890 IEMNATIVE_FP_OFF_STACK_ARG2,
3891# endif
3892# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3893 IEMNATIVE_FP_OFF_STACK_ARG3,
3894# endif
3895};
3896AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3897#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3898
3899/**
3900 * Info about shadowed guest register values.
3901 * @see IEMNATIVEGSTREG
3902 */
3903DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3904{
3905#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3906 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3907 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3908 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3909 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3910 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3911 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3912 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3913 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3914 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3915 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3916 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3917 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3918 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3919 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3920 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3921 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3922 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3923 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3924 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3925 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3926 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3927 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3928 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3929 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3930 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3931 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3932 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3933 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3934 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3935 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3936 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3937 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3938 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3939 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3940 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3941 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3942 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3943 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3944 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3945 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3946 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3947 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3948 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3949 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3950 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3951 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3952 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3953 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3954#undef CPUMCTX_OFF_AND_SIZE
3955};
3956AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3957
3958
3959/** Host CPU general purpose register names. */
3960DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3961{
3962#ifdef RT_ARCH_AMD64
3963 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3964#elif RT_ARCH_ARM64
3965 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3966 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3967#else
3968# error "port me"
3969#endif
3970};
3971
3972
3973#if 0 /* unused */
3974/**
3975 * Tries to locate a suitable register in the given register mask.
3976 *
3977 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3978 * failed.
3979 *
3980 * @returns Host register number on success, returns UINT8_MAX on failure.
3981 */
3982static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3983{
3984 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3985 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3986 if (fRegs)
3987 {
3988 /** @todo pick better here: */
3989 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3990
3991 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3992 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3993 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3994 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3995
3996 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3997 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3998 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3999 return idxReg;
4000 }
4001 return UINT8_MAX;
4002}
4003#endif /* unused */
4004
4005
4006/**
4007 * Locate a register, possibly freeing one up.
4008 *
4009 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4010 * failed.
4011 *
4012 * @returns Host register number on success. Returns UINT8_MAX if no registers
4013 * found, the caller is supposed to deal with this and raise a
4014 * allocation type specific status code (if desired).
4015 *
4016 * @throws VBox status code if we're run into trouble spilling a variable of
4017 * recording debug info. Does NOT throw anything if we're out of
4018 * registers, though.
4019 */
4020static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4021 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4022{
4023 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4024 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4025 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4026
4027 /*
4028 * Try a freed register that's shadowing a guest register.
4029 */
4030 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4031 if (fRegs)
4032 {
4033 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4034
4035#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4036 /*
4037 * When we have livness information, we use it to kick out all shadowed
4038 * guest register that will not be needed any more in this TB. If we're
4039 * lucky, this may prevent us from ending up here again.
4040 *
4041 * Note! We must consider the previous entry here so we don't free
4042 * anything that the current threaded function requires (current
4043 * entry is produced by the next threaded function).
4044 */
4045 uint32_t const idxCurCall = pReNative->idxCurCall;
4046 if (idxCurCall > 0)
4047 {
4048 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4049
4050# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4051 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4052 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4053 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4054#else
4055 /* Construct a mask of the registers not in the read or write state.
4056 Note! We could skips writes, if they aren't from us, as this is just
4057 a hack to prevent trashing registers that have just been written
4058 or will be written when we retire the current instruction. */
4059 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4060 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4061 & IEMLIVENESSBIT_MASK;
4062#endif
4063 /* Merge EFLAGS. */
4064 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4065 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4066 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4067 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4068 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4069
4070 /* If it matches any shadowed registers. */
4071 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4072 {
4073 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4074 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4075 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4076
4077 /* See if we've got any unshadowed registers we can return now. */
4078 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4079 if (fUnshadowedRegs)
4080 {
4081 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4082 return (fPreferVolatile
4083 ? ASMBitFirstSetU32(fUnshadowedRegs)
4084 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4085 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4086 - 1;
4087 }
4088 }
4089 }
4090#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4091
4092 unsigned const idxReg = (fPreferVolatile
4093 ? ASMBitFirstSetU32(fRegs)
4094 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4095 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4096 - 1;
4097
4098 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4099 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4100 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4101 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4102
4103 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4104 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4105 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4106 return idxReg;
4107 }
4108
4109 /*
4110 * Try free up a variable that's in a register.
4111 *
4112 * We do two rounds here, first evacuating variables we don't need to be
4113 * saved on the stack, then in the second round move things to the stack.
4114 */
4115 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4116 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4117 {
4118 uint32_t fVars = pReNative->Core.bmVars;
4119 while (fVars)
4120 {
4121 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4122 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4123#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4124 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4125 continue;
4126#endif
4127
4128 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4129 && (RT_BIT_32(idxReg) & fRegMask)
4130 && ( iLoop == 0
4131 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4132 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4133 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4134 {
4135 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4136 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4137 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4138 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4139 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4140 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4141
4142 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4143 {
4144 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4145 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4146 }
4147
4148 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4149 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4150
4151 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4152 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4153 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4154 return idxReg;
4155 }
4156 fVars &= ~RT_BIT_32(idxVar);
4157 }
4158 }
4159
4160 return UINT8_MAX;
4161}
4162
4163
4164/**
4165 * Reassigns a variable to a different register specified by the caller.
4166 *
4167 * @returns The new code buffer position.
4168 * @param pReNative The native recompile state.
4169 * @param off The current code buffer position.
4170 * @param idxVar The variable index.
4171 * @param idxRegOld The old host register number.
4172 * @param idxRegNew The new host register number.
4173 * @param pszCaller The caller for logging.
4174 */
4175static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4176 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4177{
4178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4179 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4180#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4181 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4182#endif
4183 RT_NOREF(pszCaller);
4184
4185 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4186
4187 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4188 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4189 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4190 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4191
4192 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4193 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4194 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4195 if (fGstRegShadows)
4196 {
4197 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4198 | RT_BIT_32(idxRegNew);
4199 while (fGstRegShadows)
4200 {
4201 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4202 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4203
4204 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4205 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4206 }
4207 }
4208
4209 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4210 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4211 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4212 return off;
4213}
4214
4215
4216/**
4217 * Moves a variable to a different register or spills it onto the stack.
4218 *
4219 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4220 * kinds can easily be recreated if needed later.
4221 *
4222 * @returns The new code buffer position.
4223 * @param pReNative The native recompile state.
4224 * @param off The current code buffer position.
4225 * @param idxVar The variable index.
4226 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4227 * call-volatile registers.
4228 */
4229DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4230 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4231{
4232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4233 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4234 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4235 Assert(!pVar->fRegAcquired);
4236
4237 uint8_t const idxRegOld = pVar->idxReg;
4238 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4239 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4240 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4241 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4242 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4243 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4244 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4245 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4246
4247
4248 /** @todo Add statistics on this.*/
4249 /** @todo Implement basic variable liveness analysis (python) so variables
4250 * can be freed immediately once no longer used. This has the potential to
4251 * be trashing registers and stack for dead variables.
4252 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4253
4254 /*
4255 * First try move it to a different register, as that's cheaper.
4256 */
4257 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4258 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4259 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4260 if (fRegs)
4261 {
4262 /* Avoid using shadow registers, if possible. */
4263 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4264 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4265 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4266 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4267 }
4268
4269 /*
4270 * Otherwise we must spill the register onto the stack.
4271 */
4272 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4273 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4274 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4275 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4276
4277 pVar->idxReg = UINT8_MAX;
4278 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4279 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4282 return off;
4283}
4284
4285
4286/**
4287 * Allocates a temporary host general purpose register.
4288 *
4289 * This may emit code to save register content onto the stack in order to free
4290 * up a register.
4291 *
4292 * @returns The host register number; throws VBox status code on failure,
4293 * so no need to check the return value.
4294 * @param pReNative The native recompile state.
4295 * @param poff Pointer to the variable with the code buffer position.
4296 * This will be update if we need to move a variable from
4297 * register to stack in order to satisfy the request.
4298 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4299 * registers (@c true, default) or the other way around
4300 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4301 */
4302DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4303{
4304 /*
4305 * Try find a completely unused register, preferably a call-volatile one.
4306 */
4307 uint8_t idxReg;
4308 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4309 & ~pReNative->Core.bmHstRegsWithGstShadow
4310 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4311 if (fRegs)
4312 {
4313 if (fPreferVolatile)
4314 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4315 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4316 else
4317 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4318 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4319 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4320 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4321 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4322 }
4323 else
4324 {
4325 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4326 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4327 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4328 }
4329 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4330}
4331
4332
4333/**
4334 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4335 * registers.
4336 *
4337 * @returns The host register number; throws VBox status code on failure,
4338 * so no need to check the return value.
4339 * @param pReNative The native recompile state.
4340 * @param poff Pointer to the variable with the code buffer position.
4341 * This will be update if we need to move a variable from
4342 * register to stack in order to satisfy the request.
4343 * @param fRegMask Mask of acceptable registers.
4344 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4345 * registers (@c true, default) or the other way around
4346 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4347 */
4348DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4349 bool fPreferVolatile /*= true*/)
4350{
4351 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4352 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4353
4354 /*
4355 * Try find a completely unused register, preferably a call-volatile one.
4356 */
4357 uint8_t idxReg;
4358 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4359 & ~pReNative->Core.bmHstRegsWithGstShadow
4360 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4361 & fRegMask;
4362 if (fRegs)
4363 {
4364 if (fPreferVolatile)
4365 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4366 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4367 else
4368 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4369 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4370 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4371 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4372 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4373 }
4374 else
4375 {
4376 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4377 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4378 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4379 }
4380 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4381}
4382
4383
4384/**
4385 * Allocates a temporary register for loading an immediate value into.
4386 *
4387 * This will emit code to load the immediate, unless there happens to be an
4388 * unused register with the value already loaded.
4389 *
4390 * The caller will not modify the returned register, it must be considered
4391 * read-only. Free using iemNativeRegFreeTmpImm.
4392 *
4393 * @returns The host register number; throws VBox status code on failure, so no
4394 * need to check the return value.
4395 * @param pReNative The native recompile state.
4396 * @param poff Pointer to the variable with the code buffer position.
4397 * @param uImm The immediate value that the register must hold upon
4398 * return.
4399 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4400 * registers (@c true, default) or the other way around
4401 * (@c false).
4402 *
4403 * @note Reusing immediate values has not been implemented yet.
4404 */
4405DECL_HIDDEN_THROW(uint8_t)
4406iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4407{
4408 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4409 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4410 return idxReg;
4411}
4412
4413
4414/**
4415 * Allocates a temporary host general purpose register for keeping a guest
4416 * register value.
4417 *
4418 * Since we may already have a register holding the guest register value,
4419 * code will be emitted to do the loading if that's not the case. Code may also
4420 * be emitted if we have to free up a register to satify the request.
4421 *
4422 * @returns The host register number; throws VBox status code on failure, so no
4423 * need to check the return value.
4424 * @param pReNative The native recompile state.
4425 * @param poff Pointer to the variable with the code buffer
4426 * position. This will be update if we need to move a
4427 * variable from register to stack in order to satisfy
4428 * the request.
4429 * @param enmGstReg The guest register that will is to be updated.
4430 * @param enmIntendedUse How the caller will be using the host register.
4431 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4432 * register is okay (default). The ASSUMPTION here is
4433 * that the caller has already flushed all volatile
4434 * registers, so this is only applied if we allocate a
4435 * new register.
4436 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4437 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4438 */
4439DECL_HIDDEN_THROW(uint8_t)
4440iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4441 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4442 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4443{
4444 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4445#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4446 AssertMsg( fSkipLivenessAssert
4447 || pReNative->idxCurCall == 0
4448 || enmGstReg == kIemNativeGstReg_Pc
4449 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4450 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4451 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4452 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4453 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4454 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4455#endif
4456 RT_NOREF(fSkipLivenessAssert);
4457#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4458 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4459#endif
4460 uint32_t const fRegMask = !fNoVolatileRegs
4461 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4462 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4463
4464 /*
4465 * First check if the guest register value is already in a host register.
4466 */
4467 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4468 {
4469 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4470 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4471 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4472 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4473
4474 /* It's not supposed to be allocated... */
4475 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4476 {
4477 /*
4478 * If the register will trash the guest shadow copy, try find a
4479 * completely unused register we can use instead. If that fails,
4480 * we need to disassociate the host reg from the guest reg.
4481 */
4482 /** @todo would be nice to know if preserving the register is in any way helpful. */
4483 /* If the purpose is calculations, try duplicate the register value as
4484 we'll be clobbering the shadow. */
4485 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4486 && ( ~pReNative->Core.bmHstRegs
4487 & ~pReNative->Core.bmHstRegsWithGstShadow
4488 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4489 {
4490 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4491
4492 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4493
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4496 g_apszIemNativeHstRegNames[idxRegNew]));
4497 idxReg = idxRegNew;
4498 }
4499 /* If the current register matches the restrictions, go ahead and allocate
4500 it for the caller. */
4501 else if (fRegMask & RT_BIT_32(idxReg))
4502 {
4503 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4504 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4505 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4506 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4507 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4508 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4509 else
4510 {
4511 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4512 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4513 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4514 }
4515 }
4516 /* Otherwise, allocate a register that satisfies the caller and transfer
4517 the shadowing if compatible with the intended use. (This basically
4518 means the call wants a non-volatile register (RSP push/pop scenario).) */
4519 else
4520 {
4521 Assert(fNoVolatileRegs);
4522 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4523 !fNoVolatileRegs
4524 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4525 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4526 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4527 {
4528 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4529 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4530 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4531 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4532 }
4533 else
4534 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4535 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4536 g_apszIemNativeHstRegNames[idxRegNew]));
4537 idxReg = idxRegNew;
4538 }
4539 }
4540 else
4541 {
4542 /*
4543 * Oops. Shadowed guest register already allocated!
4544 *
4545 * Allocate a new register, copy the value and, if updating, the
4546 * guest shadow copy assignment to the new register.
4547 */
4548 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4549 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4550 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4551 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4552
4553 /** @todo share register for readonly access. */
4554 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4555 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4556
4557 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4558 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4559
4560 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4561 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4562 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4563 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4564 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4565 else
4566 {
4567 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4568 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4569 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4570 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4571 }
4572 idxReg = idxRegNew;
4573 }
4574 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4575
4576#ifdef VBOX_STRICT
4577 /* Strict builds: Check that the value is correct. */
4578 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4579#endif
4580
4581 return idxReg;
4582 }
4583
4584 /*
4585 * Allocate a new register, load it with the guest value and designate it as a copy of the
4586 */
4587 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4588
4589 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4590 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4591
4592 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4593 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4594 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4595 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4596
4597 return idxRegNew;
4598}
4599
4600
4601/**
4602 * Allocates a temporary host general purpose register that already holds the
4603 * given guest register value.
4604 *
4605 * The use case for this function is places where the shadowing state cannot be
4606 * modified due to branching and such. This will fail if the we don't have a
4607 * current shadow copy handy or if it's incompatible. The only code that will
4608 * be emitted here is value checking code in strict builds.
4609 *
4610 * The intended use can only be readonly!
4611 *
4612 * @returns The host register number, UINT8_MAX if not present.
4613 * @param pReNative The native recompile state.
4614 * @param poff Pointer to the instruction buffer offset.
4615 * Will be updated in strict builds if a register is
4616 * found.
4617 * @param enmGstReg The guest register that will is to be updated.
4618 * @note In strict builds, this may throw instruction buffer growth failures.
4619 * Non-strict builds will not throw anything.
4620 * @sa iemNativeRegAllocTmpForGuestReg
4621 */
4622DECL_HIDDEN_THROW(uint8_t)
4623iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4624{
4625 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4626#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4627 AssertMsg( pReNative->idxCurCall == 0
4628 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4629 || enmGstReg == kIemNativeGstReg_Pc,
4630 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4631#endif
4632
4633 /*
4634 * First check if the guest register value is already in a host register.
4635 */
4636 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4637 {
4638 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4639 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4640 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4641 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4642
4643 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4644 {
4645 /*
4646 * We only do readonly use here, so easy compared to the other
4647 * variant of this code.
4648 */
4649 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4650 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4651 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4652 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4653 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4654
4655#ifdef VBOX_STRICT
4656 /* Strict builds: Check that the value is correct. */
4657 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4658#else
4659 RT_NOREF(poff);
4660#endif
4661 return idxReg;
4662 }
4663 }
4664
4665 return UINT8_MAX;
4666}
4667
4668
4669/**
4670 * Allocates argument registers for a function call.
4671 *
4672 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4673 * need to check the return value.
4674 * @param pReNative The native recompile state.
4675 * @param off The current code buffer offset.
4676 * @param cArgs The number of arguments the function call takes.
4677 */
4678DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4679{
4680 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4681 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4682 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4683 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4684
4685 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4686 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4687 else if (cArgs == 0)
4688 return true;
4689
4690 /*
4691 * Do we get luck and all register are free and not shadowing anything?
4692 */
4693 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4694 for (uint32_t i = 0; i < cArgs; i++)
4695 {
4696 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4697 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4698 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4699 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4700 }
4701 /*
4702 * Okay, not lucky so we have to free up the registers.
4703 */
4704 else
4705 for (uint32_t i = 0; i < cArgs; i++)
4706 {
4707 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4708 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4709 {
4710 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4711 {
4712 case kIemNativeWhat_Var:
4713 {
4714 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4716 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4717 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4718 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4719#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4720 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4721#endif
4722
4723 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4724 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4725 else
4726 {
4727 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4728 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4729 }
4730 break;
4731 }
4732
4733 case kIemNativeWhat_Tmp:
4734 case kIemNativeWhat_Arg:
4735 case kIemNativeWhat_rc:
4736 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4737 default:
4738 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4739 }
4740
4741 }
4742 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4743 {
4744 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4745 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4746 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4747 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4748 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4749 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4750 }
4751 else
4752 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4753 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4754 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4755 }
4756 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4757 return true;
4758}
4759
4760
4761DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4762
4763
4764#if 0
4765/**
4766 * Frees a register assignment of any type.
4767 *
4768 * @param pReNative The native recompile state.
4769 * @param idxHstReg The register to free.
4770 *
4771 * @note Does not update variables.
4772 */
4773DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4774{
4775 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4776 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4777 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4778 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4779 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4780 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4781 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4782 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4783 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4784 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4785 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4786 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4787 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4788 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4789
4790 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4791 /* no flushing, right:
4792 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4793 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4794 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4795 */
4796}
4797#endif
4798
4799
4800/**
4801 * Frees a temporary register.
4802 *
4803 * Any shadow copies of guest registers assigned to the host register will not
4804 * be flushed by this operation.
4805 */
4806DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4807{
4808 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4809 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4810 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4811 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4812 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4813}
4814
4815
4816/**
4817 * Frees a temporary immediate register.
4818 *
4819 * It is assumed that the call has not modified the register, so it still hold
4820 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4821 */
4822DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4823{
4824 iemNativeRegFreeTmp(pReNative, idxHstReg);
4825}
4826
4827
4828/**
4829 * Frees a register assigned to a variable.
4830 *
4831 * The register will be disassociated from the variable.
4832 */
4833DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4834{
4835 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4836 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4837 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4838 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4839 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4840#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4841 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4842#endif
4843
4844 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4845 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4846 if (!fFlushShadows)
4847 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4848 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4849 else
4850 {
4851 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4852 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4853 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4854 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4855 uint64_t fGstRegShadows = fGstRegShadowsOld;
4856 while (fGstRegShadows)
4857 {
4858 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4859 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4860
4861 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4862 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4863 }
4864 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4865 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4866 }
4867}
4868
4869
4870#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4871# ifdef LOG_ENABLED
4872/** Host CPU SIMD register names. */
4873DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4874{
4875# ifdef RT_ARCH_AMD64
4876 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4877# elif RT_ARCH_ARM64
4878 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4879 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4880# else
4881# error "port me"
4882# endif
4883};
4884# endif
4885
4886
4887/**
4888 * Frees a SIMD register assigned to a variable.
4889 *
4890 * The register will be disassociated from the variable.
4891 */
4892DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4893{
4894 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4895 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4896 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4897 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4898 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4900
4901 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4902 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4903 if (!fFlushShadows)
4904 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4905 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4906 else
4907 {
4908 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4909 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4910 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4911 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4912 uint64_t fGstRegShadows = fGstRegShadowsOld;
4913 while (fGstRegShadows)
4914 {
4915 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4916 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4917
4918 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4919 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4920 }
4921 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4922 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4923 }
4924}
4925#endif
4926
4927
4928/**
4929 * Called right before emitting a call instruction to move anything important
4930 * out of call-volatile registers, free and flush the call-volatile registers,
4931 * optionally freeing argument variables.
4932 *
4933 * @returns New code buffer offset, UINT32_MAX on failure.
4934 * @param pReNative The native recompile state.
4935 * @param off The code buffer offset.
4936 * @param cArgs The number of arguments the function call takes.
4937 * It is presumed that the host register part of these have
4938 * been allocated as such already and won't need moving,
4939 * just freeing.
4940 * @param fKeepVars Mask of variables that should keep their register
4941 * assignments. Caller must take care to handle these.
4942 */
4943DECL_HIDDEN_THROW(uint32_t)
4944iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4945{
4946 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4947
4948 /* fKeepVars will reduce this mask. */
4949 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4950
4951 /*
4952 * Move anything important out of volatile registers.
4953 */
4954 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4955 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4956 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4957#ifdef IEMNATIVE_REG_FIXED_TMP0
4958 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4959#endif
4960#ifdef IEMNATIVE_REG_FIXED_TMP1
4961 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4962#endif
4963#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4964 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4965#endif
4966 & ~g_afIemNativeCallRegs[cArgs];
4967
4968 fRegsToMove &= pReNative->Core.bmHstRegs;
4969 if (!fRegsToMove)
4970 { /* likely */ }
4971 else
4972 {
4973 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4974 while (fRegsToMove != 0)
4975 {
4976 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4977 fRegsToMove &= ~RT_BIT_32(idxReg);
4978
4979 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4980 {
4981 case kIemNativeWhat_Var:
4982 {
4983 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4984 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4985 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4986 Assert(pVar->idxReg == idxReg);
4987 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4988 {
4989 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4990 idxVar, pVar->enmKind, pVar->idxReg));
4991 if (pVar->enmKind != kIemNativeVarKind_Stack)
4992 pVar->idxReg = UINT8_MAX;
4993 else
4994 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4995 }
4996 else
4997 fRegsToFree &= ~RT_BIT_32(idxReg);
4998 continue;
4999 }
5000
5001 case kIemNativeWhat_Arg:
5002 AssertMsgFailed(("What?!?: %u\n", idxReg));
5003 continue;
5004
5005 case kIemNativeWhat_rc:
5006 case kIemNativeWhat_Tmp:
5007 AssertMsgFailed(("Missing free: %u\n", idxReg));
5008 continue;
5009
5010 case kIemNativeWhat_FixedTmp:
5011 case kIemNativeWhat_pVCpuFixed:
5012 case kIemNativeWhat_pCtxFixed:
5013 case kIemNativeWhat_PcShadow:
5014 case kIemNativeWhat_FixedReserved:
5015 case kIemNativeWhat_Invalid:
5016 case kIemNativeWhat_End:
5017 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5018 }
5019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5020 }
5021 }
5022
5023 /*
5024 * Do the actual freeing.
5025 */
5026 if (pReNative->Core.bmHstRegs & fRegsToFree)
5027 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5028 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5029 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5030
5031 /* If there are guest register shadows in any call-volatile register, we
5032 have to clear the corrsponding guest register masks for each register. */
5033 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5034 if (fHstRegsWithGstShadow)
5035 {
5036 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5037 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5038 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5039 do
5040 {
5041 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5042 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5043
5044 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5045 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5046 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5047 } while (fHstRegsWithGstShadow != 0);
5048 }
5049
5050 return off;
5051}
5052
5053
5054/**
5055 * Flushes a set of guest register shadow copies.
5056 *
5057 * This is usually done after calling a threaded function or a C-implementation
5058 * of an instruction.
5059 *
5060 * @param pReNative The native recompile state.
5061 * @param fGstRegs Set of guest registers to flush.
5062 */
5063DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5064{
5065 /*
5066 * Reduce the mask by what's currently shadowed
5067 */
5068 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5069 fGstRegs &= bmGstRegShadowsOld;
5070 if (fGstRegs)
5071 {
5072 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5073 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5074 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5075 if (bmGstRegShadowsNew)
5076 {
5077 /*
5078 * Partial.
5079 */
5080 do
5081 {
5082 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5083 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5084 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5085 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5086 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5087
5088 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5089 fGstRegs &= ~fInThisHstReg;
5090 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5091 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5092 if (!fGstRegShadowsNew)
5093 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5094 } while (fGstRegs != 0);
5095 }
5096 else
5097 {
5098 /*
5099 * Clear all.
5100 */
5101 do
5102 {
5103 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5104 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5105 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5106 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5107 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5108
5109 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5110 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5111 } while (fGstRegs != 0);
5112 pReNative->Core.bmHstRegsWithGstShadow = 0;
5113 }
5114 }
5115}
5116
5117
5118/**
5119 * Flushes guest register shadow copies held by a set of host registers.
5120 *
5121 * This is used with the TLB lookup code for ensuring that we don't carry on
5122 * with any guest shadows in volatile registers, as these will get corrupted by
5123 * a TLB miss.
5124 *
5125 * @param pReNative The native recompile state.
5126 * @param fHstRegs Set of host registers to flush guest shadows for.
5127 */
5128DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5129{
5130 /*
5131 * Reduce the mask by what's currently shadowed.
5132 */
5133 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5134 fHstRegs &= bmHstRegsWithGstShadowOld;
5135 if (fHstRegs)
5136 {
5137 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5138 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5139 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5140 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5141 if (bmHstRegsWithGstShadowNew)
5142 {
5143 /*
5144 * Partial (likely).
5145 */
5146 uint64_t fGstShadows = 0;
5147 do
5148 {
5149 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5150 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5151 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5152 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5153
5154 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5155 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5156 fHstRegs &= ~RT_BIT_32(idxHstReg);
5157 } while (fHstRegs != 0);
5158 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5159 }
5160 else
5161 {
5162 /*
5163 * Clear all.
5164 */
5165 do
5166 {
5167 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5168 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5169 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5170 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5171
5172 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5173 fHstRegs &= ~RT_BIT_32(idxHstReg);
5174 } while (fHstRegs != 0);
5175 pReNative->Core.bmGstRegShadows = 0;
5176 }
5177 }
5178}
5179
5180
5181/**
5182 * Restores guest shadow copies in volatile registers.
5183 *
5184 * This is used after calling a helper function (think TLB miss) to restore the
5185 * register state of volatile registers.
5186 *
5187 * @param pReNative The native recompile state.
5188 * @param off The code buffer offset.
5189 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5190 * be active (allocated) w/o asserting. Hack.
5191 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5192 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5193 */
5194DECL_HIDDEN_THROW(uint32_t)
5195iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5196{
5197 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5198 if (fHstRegs)
5199 {
5200 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5201 do
5202 {
5203 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5204
5205 /* It's not fatal if a register is active holding a variable that
5206 shadowing a guest register, ASSUMING all pending guest register
5207 writes were flushed prior to the helper call. However, we'll be
5208 emitting duplicate restores, so it wasts code space. */
5209 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5210 RT_NOREF(fHstRegsActiveShadows);
5211
5212 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5213 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5214 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5215 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5216
5217 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5218 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5219
5220 fHstRegs &= ~RT_BIT_32(idxHstReg);
5221 } while (fHstRegs != 0);
5222 }
5223 return off;
5224}
5225
5226
5227
5228
5229/*********************************************************************************************************************************
5230* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5231*********************************************************************************************************************************/
5232#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5233
5234/**
5235 * Info about shadowed guest SIMD register values.
5236 * @see IEMNATIVEGSTSIMDREG
5237 */
5238static struct
5239{
5240 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5241 uint32_t offXmm;
5242 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5243 uint32_t offYmm;
5244 /** Name (for logging). */
5245 const char *pszName;
5246} const g_aGstSimdShadowInfo[] =
5247{
5248#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5249 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5250 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5251 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5252 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5253 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5254 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5255 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5256 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5257 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5258 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5259 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5260 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5261 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5262 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5263 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5264 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5265 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5266#undef CPUMCTX_OFF_AND_SIZE
5267};
5268AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5269
5270
5271/**
5272 * Frees a temporary SIMD register.
5273 *
5274 * Any shadow copies of guest registers assigned to the host register will not
5275 * be flushed by this operation.
5276 */
5277DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5278{
5279 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5280 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5281 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5282 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5283 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5284}
5285
5286
5287/**
5288 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5289 *
5290 * @returns New code bufferoffset.
5291 * @param pReNative The native recompile state.
5292 * @param off Current code buffer position.
5293 * @param enmGstSimdReg The guest SIMD register to flush.
5294 */
5295DECL_HIDDEN_THROW(uint32_t)
5296iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5297{
5298 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5299
5300 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5301 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5302 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5303 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5304
5305 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5306 {
5307 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5308 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5309 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5310 }
5311
5312 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5313 {
5314 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5315 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5316 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5317 }
5318
5319 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5320 return off;
5321}
5322
5323
5324/**
5325 * Locate a register, possibly freeing one up.
5326 *
5327 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5328 * failed.
5329 *
5330 * @returns Host register number on success. Returns UINT8_MAX if no registers
5331 * found, the caller is supposed to deal with this and raise a
5332 * allocation type specific status code (if desired).
5333 *
5334 * @throws VBox status code if we're run into trouble spilling a variable of
5335 * recording debug info. Does NOT throw anything if we're out of
5336 * registers, though.
5337 */
5338static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5339 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5340{
5341 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5342 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5343 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5344
5345 /*
5346 * Try a freed register that's shadowing a guest register.
5347 */
5348 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5349 if (fRegs)
5350 {
5351 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5352
5353#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5354 /*
5355 * When we have livness information, we use it to kick out all shadowed
5356 * guest register that will not be needed any more in this TB. If we're
5357 * lucky, this may prevent us from ending up here again.
5358 *
5359 * Note! We must consider the previous entry here so we don't free
5360 * anything that the current threaded function requires (current
5361 * entry is produced by the next threaded function).
5362 */
5363 uint32_t const idxCurCall = pReNative->idxCurCall;
5364 if (idxCurCall > 0)
5365 {
5366 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5367
5368# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5369 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5370 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5371 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5372#else
5373 /* Construct a mask of the registers not in the read or write state.
5374 Note! We could skips writes, if they aren't from us, as this is just
5375 a hack to prevent trashing registers that have just been written
5376 or will be written when we retire the current instruction. */
5377 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5378 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5379 & IEMLIVENESSBIT_MASK;
5380#endif
5381 /* If it matches any shadowed registers. */
5382 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5383 {
5384 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5385 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5386 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5387
5388 /* See if we've got any unshadowed registers we can return now. */
5389 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5390 if (fUnshadowedRegs)
5391 {
5392 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5393 return (fPreferVolatile
5394 ? ASMBitFirstSetU32(fUnshadowedRegs)
5395 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5396 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5397 - 1;
5398 }
5399 }
5400 }
5401#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5402
5403 unsigned const idxReg = (fPreferVolatile
5404 ? ASMBitFirstSetU32(fRegs)
5405 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5406 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5407 - 1;
5408
5409 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5410 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5411 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5412 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5413
5414 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5415 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5416 uint32_t idxGstSimdReg = 0;
5417 do
5418 {
5419 if (fGstRegShadows & 0x1)
5420 {
5421 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5422 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5423 }
5424 idxGstSimdReg++;
5425 fGstRegShadows >>= 1;
5426 } while (fGstRegShadows);
5427
5428 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5429 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5430 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5431 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5432 return idxReg;
5433 }
5434
5435 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5436
5437 /*
5438 * Try free up a variable that's in a register.
5439 *
5440 * We do two rounds here, first evacuating variables we don't need to be
5441 * saved on the stack, then in the second round move things to the stack.
5442 */
5443 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5444 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5445 {
5446 uint32_t fVars = pReNative->Core.bmVars;
5447 while (fVars)
5448 {
5449 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5450 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5451 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5452 continue;
5453
5454 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5455 && (RT_BIT_32(idxReg) & fRegMask)
5456 && ( iLoop == 0
5457 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5458 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5459 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5460 {
5461 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5462 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5463 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5464 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5465 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5466 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5467
5468 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5469 {
5470 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5471 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5472 }
5473
5474 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5475 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5476
5477 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5478 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5479 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5480 return idxReg;
5481 }
5482 fVars &= ~RT_BIT_32(idxVar);
5483 }
5484 }
5485
5486 AssertFailed();
5487 return UINT8_MAX;
5488}
5489
5490
5491/**
5492 * Flushes a set of guest register shadow copies.
5493 *
5494 * This is usually done after calling a threaded function or a C-implementation
5495 * of an instruction.
5496 *
5497 * @param pReNative The native recompile state.
5498 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5499 */
5500DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5501{
5502 /*
5503 * Reduce the mask by what's currently shadowed
5504 */
5505 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5506 fGstSimdRegs &= bmGstSimdRegShadows;
5507 if (fGstSimdRegs)
5508 {
5509 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5510 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5511 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5512 if (bmGstSimdRegShadowsNew)
5513 {
5514 /*
5515 * Partial.
5516 */
5517 do
5518 {
5519 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5520 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5521 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5522 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5523 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5524 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5525
5526 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5527 fGstSimdRegs &= ~fInThisHstReg;
5528 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5529 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5530 if (!fGstRegShadowsNew)
5531 {
5532 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5533 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5534 }
5535 } while (fGstSimdRegs != 0);
5536 }
5537 else
5538 {
5539 /*
5540 * Clear all.
5541 */
5542 do
5543 {
5544 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5545 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5546 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5547 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5548 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5549 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5550
5551 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5552 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5553 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5554 } while (fGstSimdRegs != 0);
5555 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5556 }
5557 }
5558}
5559
5560
5561/**
5562 * Allocates a temporary host SIMD register.
5563 *
5564 * This may emit code to save register content onto the stack in order to free
5565 * up a register.
5566 *
5567 * @returns The host register number; throws VBox status code on failure,
5568 * so no need to check the return value.
5569 * @param pReNative The native recompile state.
5570 * @param poff Pointer to the variable with the code buffer position.
5571 * This will be update if we need to move a variable from
5572 * register to stack in order to satisfy the request.
5573 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5574 * registers (@c true, default) or the other way around
5575 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5576 */
5577DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5578{
5579 /*
5580 * Try find a completely unused register, preferably a call-volatile one.
5581 */
5582 uint8_t idxSimdReg;
5583 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5584 & ~pReNative->Core.bmHstRegsWithGstShadow
5585 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5586 if (fRegs)
5587 {
5588 if (fPreferVolatile)
5589 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5590 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5591 else
5592 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5593 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5594 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5595 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5596 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5597 }
5598 else
5599 {
5600 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5601 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5602 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5603 }
5604
5605 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5606 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5607}
5608
5609
5610/**
5611 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5612 * registers.
5613 *
5614 * @returns The host register number; throws VBox status code on failure,
5615 * so no need to check the return value.
5616 * @param pReNative The native recompile state.
5617 * @param poff Pointer to the variable with the code buffer position.
5618 * This will be update if we need to move a variable from
5619 * register to stack in order to satisfy the request.
5620 * @param fRegMask Mask of acceptable registers.
5621 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5622 * registers (@c true, default) or the other way around
5623 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5624 */
5625DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5626 bool fPreferVolatile /*= true*/)
5627{
5628 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5629 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5630
5631 /*
5632 * Try find a completely unused register, preferably a call-volatile one.
5633 */
5634 uint8_t idxSimdReg;
5635 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5636 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5637 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5638 & fRegMask;
5639 if (fRegs)
5640 {
5641 if (fPreferVolatile)
5642 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5643 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5644 else
5645 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5646 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5647 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5648 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5649 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5650 }
5651 else
5652 {
5653 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5654 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5655 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5656 }
5657
5658 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5659 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5660}
5661
5662
5663/**
5664 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5665 *
5666 * @param pReNative The native recompile state.
5667 * @param idxHstSimdReg The host SIMD register to update the state for.
5668 * @param enmLoadSz The load size to set.
5669 */
5670DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5671 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5672{
5673 /* Everything valid already? -> nothing to do. */
5674 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5675 return;
5676
5677 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5678 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5679 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5680 {
5681 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5682 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5683 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5684 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5685 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5686 }
5687}
5688
5689
5690static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5691 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5692{
5693 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5694 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5695 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5696 {
5697# ifdef RT_ARCH_ARM64
5698 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5699 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5700# endif
5701
5702 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5703 {
5704 switch (enmLoadSzDst)
5705 {
5706 case kIemNativeGstSimdRegLdStSz_256:
5707 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5708 break;
5709 case kIemNativeGstSimdRegLdStSz_Low128:
5710 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5711 break;
5712 case kIemNativeGstSimdRegLdStSz_High128:
5713 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5714 break;
5715 default:
5716 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5717 }
5718
5719 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5720 }
5721 }
5722 else
5723 {
5724 /* Complicated stuff where the source is currently missing something, later. */
5725 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5726 }
5727
5728 return off;
5729}
5730
5731
5732/**
5733 * Allocates a temporary host SIMD register for keeping a guest
5734 * SIMD register value.
5735 *
5736 * Since we may already have a register holding the guest register value,
5737 * code will be emitted to do the loading if that's not the case. Code may also
5738 * be emitted if we have to free up a register to satify the request.
5739 *
5740 * @returns The host register number; throws VBox status code on failure, so no
5741 * need to check the return value.
5742 * @param pReNative The native recompile state.
5743 * @param poff Pointer to the variable with the code buffer
5744 * position. This will be update if we need to move a
5745 * variable from register to stack in order to satisfy
5746 * the request.
5747 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5748 * @param enmIntendedUse How the caller will be using the host register.
5749 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5750 * register is okay (default). The ASSUMPTION here is
5751 * that the caller has already flushed all volatile
5752 * registers, so this is only applied if we allocate a
5753 * new register.
5754 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5755 */
5756DECL_HIDDEN_THROW(uint8_t)
5757iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5758 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5759 bool fNoVolatileRegs /*= false*/)
5760{
5761 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5762#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5763 AssertMsg( pReNative->idxCurCall == 0
5764 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5765 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5766 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5767 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5768 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5769 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5770#endif
5771#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5772 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5773#endif
5774 uint32_t const fRegMask = !fNoVolatileRegs
5775 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5776 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5777
5778 /*
5779 * First check if the guest register value is already in a host register.
5780 */
5781 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5782 {
5783 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5784 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5785 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5786 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5787
5788 /* It's not supposed to be allocated... */
5789 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5790 {
5791 /*
5792 * If the register will trash the guest shadow copy, try find a
5793 * completely unused register we can use instead. If that fails,
5794 * we need to disassociate the host reg from the guest reg.
5795 */
5796 /** @todo would be nice to know if preserving the register is in any way helpful. */
5797 /* If the purpose is calculations, try duplicate the register value as
5798 we'll be clobbering the shadow. */
5799 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5800 && ( ~pReNative->Core.bmHstSimdRegs
5801 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5802 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5803 {
5804 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5805
5806 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5807
5808 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5809 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5810 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5811 idxSimdReg = idxRegNew;
5812 }
5813 /* If the current register matches the restrictions, go ahead and allocate
5814 it for the caller. */
5815 else if (fRegMask & RT_BIT_32(idxSimdReg))
5816 {
5817 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5818 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5819 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5820 {
5821 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5822 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5823 else
5824 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5825 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5826 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5827 }
5828 else
5829 {
5830 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5831 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5832 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5833 }
5834 }
5835 /* Otherwise, allocate a register that satisfies the caller and transfer
5836 the shadowing if compatible with the intended use. (This basically
5837 means the call wants a non-volatile register (RSP push/pop scenario).) */
5838 else
5839 {
5840 Assert(fNoVolatileRegs);
5841 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5842 !fNoVolatileRegs
5843 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5844 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5845 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5846 {
5847 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5848 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5849 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5850 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5851 }
5852 else
5853 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5854 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5855 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5856 idxSimdReg = idxRegNew;
5857 }
5858 }
5859 else
5860 {
5861 /*
5862 * Oops. Shadowed guest register already allocated!
5863 *
5864 * Allocate a new register, copy the value and, if updating, the
5865 * guest shadow copy assignment to the new register.
5866 */
5867 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5868 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5869 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5870 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5871
5872 /** @todo share register for readonly access. */
5873 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5874 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5875
5876 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5877 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5878 else
5879 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5880
5881 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5882 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5883 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5884 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5885 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5886 else
5887 {
5888 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5889 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5890 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5891 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5892 }
5893 idxSimdReg = idxRegNew;
5894 }
5895 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5896
5897#ifdef VBOX_STRICT
5898 /* Strict builds: Check that the value is correct. */
5899 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5900 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5901#endif
5902
5903 return idxSimdReg;
5904 }
5905
5906 /*
5907 * Allocate a new register, load it with the guest value and designate it as a copy of the
5908 */
5909 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5910
5911 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5912 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5913 else
5914 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5915
5916 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5917 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5918
5919 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5920 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5921
5922 return idxRegNew;
5923}
5924
5925#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5926
5927
5928
5929/*********************************************************************************************************************************
5930* Code emitters for flushing pending guest register writes and sanity checks *
5931*********************************************************************************************************************************/
5932
5933#ifdef VBOX_STRICT
5934/**
5935 * Does internal register allocator sanity checks.
5936 */
5937DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5938{
5939 /*
5940 * Iterate host registers building a guest shadowing set.
5941 */
5942 uint64_t bmGstRegShadows = 0;
5943 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5944 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5945 while (bmHstRegsWithGstShadow)
5946 {
5947 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5948 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5949 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5950
5951 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5952 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5953 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5954 bmGstRegShadows |= fThisGstRegShadows;
5955 while (fThisGstRegShadows)
5956 {
5957 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5958 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5959 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5960 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5961 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5962 }
5963 }
5964 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5965 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5966 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5967
5968 /*
5969 * Now the other way around, checking the guest to host index array.
5970 */
5971 bmHstRegsWithGstShadow = 0;
5972 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5973 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5974 while (bmGstRegShadows)
5975 {
5976 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5977 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5978 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5979
5980 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5981 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5982 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5983 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5984 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5985 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5986 }
5987 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5988 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5989 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5990}
5991#endif /* VBOX_STRICT */
5992
5993
5994/**
5995 * Flushes any delayed guest register writes.
5996 *
5997 * This must be called prior to calling CImpl functions and any helpers that use
5998 * the guest state (like raising exceptions) and such.
5999 *
6000 * This optimization has not yet been implemented. The first target would be
6001 * RIP updates, since these are the most common ones.
6002 */
6003DECL_HIDDEN_THROW(uint32_t)
6004iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
6005{
6006#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6007 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6008 off = iemNativeEmitPcWriteback(pReNative, off);
6009#else
6010 RT_NOREF(pReNative, fGstShwExcept);
6011#endif
6012
6013#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6014 /** @todo r=bird: There must be a quicker way to check if anything needs
6015 * doing and then call simd function to do the flushing */
6016 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6017 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6018 {
6019 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6020 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6021
6022 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6023 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6024
6025 if ( fFlushShadows
6026 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6027 {
6028 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6029
6030 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6031 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6032 }
6033 }
6034#else
6035 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6036#endif
6037
6038 return off;
6039}
6040
6041
6042#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6043/**
6044 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6045 */
6046DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6047{
6048 Assert(pReNative->Core.offPc);
6049# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6050 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6051 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6052# endif
6053
6054# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6055 /* Allocate a temporary PC register. */
6056 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6057
6058 /* Perform the addition and store the result. */
6059 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6060 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6061
6062 /* Free but don't flush the PC register. */
6063 iemNativeRegFreeTmp(pReNative, idxPcReg);
6064# else
6065 /* Compare the shadow with the context value, they should match. */
6066 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6067 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6068# endif
6069
6070 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6071 pReNative->Core.offPc = 0;
6072 pReNative->Core.cInstrPcUpdateSkipped = 0;
6073
6074 return off;
6075}
6076#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6077
6078
6079/*********************************************************************************************************************************
6080* Code Emitters (larger snippets) *
6081*********************************************************************************************************************************/
6082
6083/**
6084 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6085 * extending to 64-bit width.
6086 *
6087 * @returns New code buffer offset on success, UINT32_MAX on failure.
6088 * @param pReNative .
6089 * @param off The current code buffer position.
6090 * @param idxHstReg The host register to load the guest register value into.
6091 * @param enmGstReg The guest register to load.
6092 *
6093 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6094 * that is something the caller needs to do if applicable.
6095 */
6096DECL_HIDDEN_THROW(uint32_t)
6097iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6098{
6099 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6100 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6101
6102 switch (g_aGstShadowInfo[enmGstReg].cb)
6103 {
6104 case sizeof(uint64_t):
6105 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6106 case sizeof(uint32_t):
6107 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6108 case sizeof(uint16_t):
6109 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6110#if 0 /* not present in the table. */
6111 case sizeof(uint8_t):
6112 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6113#endif
6114 default:
6115 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6116 }
6117}
6118
6119
6120#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6121/**
6122 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6123 *
6124 * @returns New code buffer offset on success, UINT32_MAX on failure.
6125 * @param pReNative The recompiler state.
6126 * @param off The current code buffer position.
6127 * @param idxHstSimdReg The host register to load the guest register value into.
6128 * @param enmGstSimdReg The guest register to load.
6129 * @param enmLoadSz The load size of the register.
6130 *
6131 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6132 * that is something the caller needs to do if applicable.
6133 */
6134DECL_HIDDEN_THROW(uint32_t)
6135iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6136 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6137{
6138 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6139
6140 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6141 switch (enmLoadSz)
6142 {
6143 case kIemNativeGstSimdRegLdStSz_256:
6144 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6145 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6146 case kIemNativeGstSimdRegLdStSz_Low128:
6147 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6148 case kIemNativeGstSimdRegLdStSz_High128:
6149 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6150 default:
6151 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6152 }
6153}
6154#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6155
6156#ifdef VBOX_STRICT
6157
6158/**
6159 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6160 *
6161 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6162 * Trashes EFLAGS on AMD64.
6163 */
6164DECL_HIDDEN_THROW(uint32_t)
6165iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6166{
6167# ifdef RT_ARCH_AMD64
6168 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6169
6170 /* rol reg64, 32 */
6171 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6172 pbCodeBuf[off++] = 0xc1;
6173 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6174 pbCodeBuf[off++] = 32;
6175
6176 /* test reg32, ffffffffh */
6177 if (idxReg >= 8)
6178 pbCodeBuf[off++] = X86_OP_REX_B;
6179 pbCodeBuf[off++] = 0xf7;
6180 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6181 pbCodeBuf[off++] = 0xff;
6182 pbCodeBuf[off++] = 0xff;
6183 pbCodeBuf[off++] = 0xff;
6184 pbCodeBuf[off++] = 0xff;
6185
6186 /* je/jz +1 */
6187 pbCodeBuf[off++] = 0x74;
6188 pbCodeBuf[off++] = 0x01;
6189
6190 /* int3 */
6191 pbCodeBuf[off++] = 0xcc;
6192
6193 /* rol reg64, 32 */
6194 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6195 pbCodeBuf[off++] = 0xc1;
6196 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6197 pbCodeBuf[off++] = 32;
6198
6199# elif defined(RT_ARCH_ARM64)
6200 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6201 /* lsr tmp0, reg64, #32 */
6202 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6203 /* cbz tmp0, +1 */
6204 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6205 /* brk #0x1100 */
6206 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6207
6208# else
6209# error "Port me!"
6210# endif
6211 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6212 return off;
6213}
6214
6215
6216/**
6217 * Emitting code that checks that the content of register @a idxReg is the same
6218 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6219 * instruction if that's not the case.
6220 *
6221 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6222 * Trashes EFLAGS on AMD64.
6223 */
6224DECL_HIDDEN_THROW(uint32_t)
6225iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6226{
6227# ifdef RT_ARCH_AMD64
6228 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6229
6230 /* cmp reg, [mem] */
6231 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6232 {
6233 if (idxReg >= 8)
6234 pbCodeBuf[off++] = X86_OP_REX_R;
6235 pbCodeBuf[off++] = 0x38;
6236 }
6237 else
6238 {
6239 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6240 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6241 else
6242 {
6243 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6244 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6245 else
6246 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6247 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6248 if (idxReg >= 8)
6249 pbCodeBuf[off++] = X86_OP_REX_R;
6250 }
6251 pbCodeBuf[off++] = 0x39;
6252 }
6253 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6254
6255 /* je/jz +1 */
6256 pbCodeBuf[off++] = 0x74;
6257 pbCodeBuf[off++] = 0x01;
6258
6259 /* int3 */
6260 pbCodeBuf[off++] = 0xcc;
6261
6262 /* For values smaller than the register size, we must check that the rest
6263 of the register is all zeros. */
6264 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6265 {
6266 /* test reg64, imm32 */
6267 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6268 pbCodeBuf[off++] = 0xf7;
6269 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6270 pbCodeBuf[off++] = 0;
6271 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6272 pbCodeBuf[off++] = 0xff;
6273 pbCodeBuf[off++] = 0xff;
6274
6275 /* je/jz +1 */
6276 pbCodeBuf[off++] = 0x74;
6277 pbCodeBuf[off++] = 0x01;
6278
6279 /* int3 */
6280 pbCodeBuf[off++] = 0xcc;
6281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6282 }
6283 else
6284 {
6285 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6286 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6287 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6288 }
6289
6290# elif defined(RT_ARCH_ARM64)
6291 /* mov TMP0, [gstreg] */
6292 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6293
6294 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6295 /* sub tmp0, tmp0, idxReg */
6296 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6297 /* cbz tmp0, +1 */
6298 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6299 /* brk #0x1000+enmGstReg */
6300 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6301 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6302
6303# else
6304# error "Port me!"
6305# endif
6306 return off;
6307}
6308
6309
6310# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6311# ifdef RT_ARCH_AMD64
6312/**
6313 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6314 */
6315DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6316{
6317 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6318 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6319 if (idxSimdReg >= 8)
6320 pbCodeBuf[off++] = X86_OP_REX_R;
6321 pbCodeBuf[off++] = 0x0f;
6322 pbCodeBuf[off++] = 0x38;
6323 pbCodeBuf[off++] = 0x29;
6324 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6325
6326 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6327 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6328 pbCodeBuf[off++] = X86_OP_REX_W
6329 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6330 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6331 pbCodeBuf[off++] = 0x0f;
6332 pbCodeBuf[off++] = 0x3a;
6333 pbCodeBuf[off++] = 0x16;
6334 pbCodeBuf[off++] = 0xeb;
6335 pbCodeBuf[off++] = 0x00;
6336
6337 /* cmp tmp0, 0xffffffffffffffff. */
6338 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6339 pbCodeBuf[off++] = 0x83;
6340 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6341 pbCodeBuf[off++] = 0xff;
6342
6343 /* je/jz +1 */
6344 pbCodeBuf[off++] = 0x74;
6345 pbCodeBuf[off++] = 0x01;
6346
6347 /* int3 */
6348 pbCodeBuf[off++] = 0xcc;
6349
6350 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6351 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6352 pbCodeBuf[off++] = X86_OP_REX_W
6353 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6354 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6355 pbCodeBuf[off++] = 0x0f;
6356 pbCodeBuf[off++] = 0x3a;
6357 pbCodeBuf[off++] = 0x16;
6358 pbCodeBuf[off++] = 0xeb;
6359 pbCodeBuf[off++] = 0x01;
6360
6361 /* cmp tmp0, 0xffffffffffffffff. */
6362 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6363 pbCodeBuf[off++] = 0x83;
6364 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6365 pbCodeBuf[off++] = 0xff;
6366
6367 /* je/jz +1 */
6368 pbCodeBuf[off++] = 0x74;
6369 pbCodeBuf[off++] = 0x01;
6370
6371 /* int3 */
6372 pbCodeBuf[off++] = 0xcc;
6373
6374 return off;
6375}
6376# endif
6377
6378
6379/**
6380 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6381 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6382 * instruction if that's not the case.
6383 *
6384 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6385 * Trashes EFLAGS on AMD64.
6386 */
6387DECL_HIDDEN_THROW(uint32_t)
6388iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6389 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6390{
6391 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6392 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6393 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6394 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6395 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6396 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6397 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6398 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6399 return off;
6400
6401# ifdef RT_ARCH_AMD64
6402 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6403 {
6404 /* movdqa vectmp0, idxSimdReg */
6405 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6406
6407 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6408
6409 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6410 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6411 }
6412
6413 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6414 {
6415 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6416 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6417
6418 /* vextracti128 vectmp0, idxSimdReg, 1 */
6419 pbCodeBuf[off++] = X86_OP_VEX3;
6420 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6421 | X86_OP_VEX3_BYTE1_X
6422 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6423 | 0x03; /* Opcode map */
6424 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6425 pbCodeBuf[off++] = 0x39;
6426 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6427 pbCodeBuf[off++] = 0x01;
6428
6429 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6430 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6431 }
6432# elif defined(RT_ARCH_ARM64)
6433 /* mov vectmp0, [gstreg] */
6434 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6435
6436 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6437 {
6438 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6439 /* eor vectmp0, vectmp0, idxSimdReg */
6440 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6441 /* cnt vectmp0, vectmp0, #0*/
6442 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6443 /* umov tmp0, vectmp0.D[0] */
6444 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6445 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6446 /* cbz tmp0, +1 */
6447 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6448 /* brk #0x1000+enmGstReg */
6449 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6450 }
6451
6452 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6453 {
6454 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6455 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6456 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6457 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6458 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6459 /* umov tmp0, (vectmp0 + 1).D[0] */
6460 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6461 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6462 /* cbz tmp0, +1 */
6463 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6464 /* brk #0x1000+enmGstReg */
6465 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6466 }
6467
6468# else
6469# error "Port me!"
6470# endif
6471
6472 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6473 return off;
6474}
6475# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6476
6477
6478/**
6479 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6480 * important bits.
6481 *
6482 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6483 * Trashes EFLAGS on AMD64.
6484 */
6485DECL_HIDDEN_THROW(uint32_t)
6486iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6487{
6488 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6489 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6490 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6491 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6492
6493#ifdef RT_ARCH_AMD64
6494 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6495
6496 /* je/jz +1 */
6497 pbCodeBuf[off++] = 0x74;
6498 pbCodeBuf[off++] = 0x01;
6499
6500 /* int3 */
6501 pbCodeBuf[off++] = 0xcc;
6502
6503# elif defined(RT_ARCH_ARM64)
6504 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6505
6506 /* b.eq +1 */
6507 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6508 /* brk #0x2000 */
6509 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6510
6511# else
6512# error "Port me!"
6513# endif
6514 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6515
6516 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6517 return off;
6518}
6519
6520#endif /* VBOX_STRICT */
6521
6522
6523#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6524/**
6525 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6526 */
6527DECL_HIDDEN_THROW(uint32_t)
6528iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6529{
6530 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6531
6532 fEflNeeded &= X86_EFL_STATUS_BITS;
6533 if (fEflNeeded)
6534 {
6535# ifdef RT_ARCH_AMD64
6536 /* test dword [pVCpu + offVCpu], imm32 */
6537 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6538 if (fEflNeeded <= 0xff)
6539 {
6540 pCodeBuf[off++] = 0xf6;
6541 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6542 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6543 }
6544 else
6545 {
6546 pCodeBuf[off++] = 0xf7;
6547 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6548 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6549 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6550 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6551 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6552 }
6553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6554
6555# else
6556 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6557 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6558 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6559# ifdef RT_ARCH_ARM64
6560 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6561 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6562# else
6563# error "Port me!"
6564# endif
6565 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6566# endif
6567 }
6568 return off;
6569}
6570#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6571
6572
6573/**
6574 * Emits a code for checking the return code of a call and rcPassUp, returning
6575 * from the code if either are non-zero.
6576 */
6577DECL_HIDDEN_THROW(uint32_t)
6578iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6579{
6580#ifdef RT_ARCH_AMD64
6581 /*
6582 * AMD64: eax = call status code.
6583 */
6584
6585 /* edx = rcPassUp */
6586 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6587# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6588 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6589# endif
6590
6591 /* edx = eax | rcPassUp */
6592 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6593 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6594 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6595 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6596
6597 /* Jump to non-zero status return path. */
6598 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6599
6600 /* done. */
6601
6602#elif RT_ARCH_ARM64
6603 /*
6604 * ARM64: w0 = call status code.
6605 */
6606# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6607 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6608# endif
6609 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6610
6611 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6612
6613 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6614
6615 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6616 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6617 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6618
6619#else
6620# error "port me"
6621#endif
6622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6623 RT_NOREF_PV(idxInstr);
6624 return off;
6625}
6626
6627
6628/**
6629 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6630 * raising a \#GP(0) if it isn't.
6631 *
6632 * @returns New code buffer offset, UINT32_MAX on failure.
6633 * @param pReNative The native recompile state.
6634 * @param off The code buffer offset.
6635 * @param idxAddrReg The host register with the address to check.
6636 * @param idxInstr The current instruction.
6637 */
6638DECL_HIDDEN_THROW(uint32_t)
6639iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6640{
6641 /*
6642 * Make sure we don't have any outstanding guest register writes as we may
6643 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6644 */
6645 off = iemNativeRegFlushPendingWrites(pReNative, off);
6646
6647#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6648 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6649#else
6650 RT_NOREF(idxInstr);
6651#endif
6652
6653#ifdef RT_ARCH_AMD64
6654 /*
6655 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6656 * return raisexcpt();
6657 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6658 */
6659 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6660
6661 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6662 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6663 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6664 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6665 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6666
6667 iemNativeRegFreeTmp(pReNative, iTmpReg);
6668
6669#elif defined(RT_ARCH_ARM64)
6670 /*
6671 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6672 * return raisexcpt();
6673 * ----
6674 * mov x1, 0x800000000000
6675 * add x1, x0, x1
6676 * cmp xzr, x1, lsr 48
6677 * b.ne .Lraisexcpt
6678 */
6679 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6680
6681 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6682 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6683 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6684 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6685
6686 iemNativeRegFreeTmp(pReNative, iTmpReg);
6687
6688#else
6689# error "Port me"
6690#endif
6691 return off;
6692}
6693
6694
6695/**
6696 * Emits code to check if that the content of @a idxAddrReg is within the limit
6697 * of CS, raising a \#GP(0) if it isn't.
6698 *
6699 * @returns New code buffer offset; throws VBox status code on error.
6700 * @param pReNative The native recompile state.
6701 * @param off The code buffer offset.
6702 * @param idxAddrReg The host register (32-bit) with the address to
6703 * check.
6704 * @param idxInstr The current instruction.
6705 */
6706DECL_HIDDEN_THROW(uint32_t)
6707iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6708 uint8_t idxAddrReg, uint8_t idxInstr)
6709{
6710 /*
6711 * Make sure we don't have any outstanding guest register writes as we may
6712 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6713 */
6714 off = iemNativeRegFlushPendingWrites(pReNative, off);
6715
6716#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6717 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6718#else
6719 RT_NOREF(idxInstr);
6720#endif
6721
6722 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6723 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6724 kIemNativeGstRegUse_ReadOnly);
6725
6726 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6727 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6728
6729 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6730 return off;
6731}
6732
6733
6734/**
6735 * Emits a call to a CImpl function or something similar.
6736 */
6737DECL_HIDDEN_THROW(uint32_t)
6738iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6739 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6740{
6741 /* Writeback everything. */
6742 off = iemNativeRegFlushPendingWrites(pReNative, off);
6743
6744 /*
6745 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6746 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6747 */
6748 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6749 fGstShwFlush
6750 | RT_BIT_64(kIemNativeGstReg_Pc)
6751 | RT_BIT_64(kIemNativeGstReg_EFlags));
6752 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6753
6754 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6755
6756 /*
6757 * Load the parameters.
6758 */
6759#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6760 /* Special code the hidden VBOXSTRICTRC pointer. */
6761 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6762 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6763 if (cAddParams > 0)
6764 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6765 if (cAddParams > 1)
6766 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6767 if (cAddParams > 2)
6768 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6769 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6770
6771#else
6772 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6773 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6774 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6775 if (cAddParams > 0)
6776 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6777 if (cAddParams > 1)
6778 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6779 if (cAddParams > 2)
6780# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6781 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6782# else
6783 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6784# endif
6785#endif
6786
6787 /*
6788 * Make the call.
6789 */
6790 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6791
6792#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6793 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6794#endif
6795
6796 /*
6797 * Check the status code.
6798 */
6799 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6800}
6801
6802
6803/**
6804 * Emits a call to a threaded worker function.
6805 */
6806DECL_HIDDEN_THROW(uint32_t)
6807iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6808{
6809 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6810
6811 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6812 off = iemNativeRegFlushPendingWrites(pReNative, off);
6813
6814 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6815 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6816
6817#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6818 /* The threaded function may throw / long jmp, so set current instruction
6819 number if we're counting. */
6820 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6821#endif
6822
6823 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6824
6825#ifdef RT_ARCH_AMD64
6826 /* Load the parameters and emit the call. */
6827# ifdef RT_OS_WINDOWS
6828# ifndef VBOXSTRICTRC_STRICT_ENABLED
6829 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6830 if (cParams > 0)
6831 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6832 if (cParams > 1)
6833 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6834 if (cParams > 2)
6835 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6836# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6837 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6838 if (cParams > 0)
6839 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6840 if (cParams > 1)
6841 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6842 if (cParams > 2)
6843 {
6844 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6845 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6846 }
6847 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6848# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6849# else
6850 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6851 if (cParams > 0)
6852 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6853 if (cParams > 1)
6854 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6855 if (cParams > 2)
6856 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6857# endif
6858
6859 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6860
6861# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6862 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6863# endif
6864
6865#elif RT_ARCH_ARM64
6866 /*
6867 * ARM64:
6868 */
6869 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6870 if (cParams > 0)
6871 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6872 if (cParams > 1)
6873 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6874 if (cParams > 2)
6875 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6876
6877 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6878
6879#else
6880# error "port me"
6881#endif
6882
6883 /*
6884 * Check the status code.
6885 */
6886 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6887
6888 return off;
6889}
6890
6891#ifdef VBOX_WITH_STATISTICS
6892/**
6893 * Emits code to update the thread call statistics.
6894 */
6895DECL_INLINE_THROW(uint32_t)
6896iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6897{
6898 /*
6899 * Update threaded function stats.
6900 */
6901 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6902 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6903# if defined(RT_ARCH_ARM64)
6904 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6905 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6906 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6907 iemNativeRegFreeTmp(pReNative, idxTmp1);
6908 iemNativeRegFreeTmp(pReNative, idxTmp2);
6909# else
6910 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6911# endif
6912 return off;
6913}
6914#endif /* VBOX_WITH_STATISTICS */
6915
6916
6917/**
6918 * Emits the code at the ReturnWithFlags label (returns
6919 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6920 */
6921static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6922{
6923 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6924 if (idxLabel != UINT32_MAX)
6925 {
6926 iemNativeLabelDefine(pReNative, idxLabel, off);
6927
6928 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6929
6930 /* jump back to the return sequence. */
6931 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6932 }
6933 return off;
6934}
6935
6936
6937/**
6938 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6939 */
6940static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6941{
6942 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6943 if (idxLabel != UINT32_MAX)
6944 {
6945 iemNativeLabelDefine(pReNative, idxLabel, off);
6946
6947 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6948
6949 /* jump back to the return sequence. */
6950 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6951 }
6952 return off;
6953}
6954
6955
6956/**
6957 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6958 */
6959static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6960{
6961 /*
6962 * Generate the rc + rcPassUp fiddling code if needed.
6963 */
6964 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6965 if (idxLabel != UINT32_MAX)
6966 {
6967 iemNativeLabelDefine(pReNative, idxLabel, off);
6968
6969 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6970#ifdef RT_ARCH_AMD64
6971# ifdef RT_OS_WINDOWS
6972# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6973 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6974# endif
6975 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6976 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6977# else
6978 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6979 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6980# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6981 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6982# endif
6983# endif
6984# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6985 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6986# endif
6987
6988#else
6989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6990 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6991 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6992#endif
6993
6994 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6995 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6996 }
6997 return off;
6998}
6999
7000
7001/**
7002 * Emits a standard epilog.
7003 */
7004static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7005{
7006 *pidxReturnLabel = UINT32_MAX;
7007
7008 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7009 off = iemNativeRegFlushPendingWrites(pReNative, off);
7010
7011 /*
7012 * Successful return, so clear the return register (eax, w0).
7013 */
7014 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7015
7016 /*
7017 * Define label for common return point.
7018 */
7019 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7020 *pidxReturnLabel = idxReturn;
7021
7022 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7023
7024 /*
7025 * Restore registers and return.
7026 */
7027#ifdef RT_ARCH_AMD64
7028 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7029
7030 /* Reposition esp at the r15 restore point. */
7031 pbCodeBuf[off++] = X86_OP_REX_W;
7032 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7033 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7034 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7035
7036 /* Pop non-volatile registers and return */
7037 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7038 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7039 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7040 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7041 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7042 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7043 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7044 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7045# ifdef RT_OS_WINDOWS
7046 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7047 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7048# endif
7049 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7050 pbCodeBuf[off++] = 0xc9; /* leave */
7051 pbCodeBuf[off++] = 0xc3; /* ret */
7052 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7053
7054#elif RT_ARCH_ARM64
7055 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7056
7057 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7058 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7059 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7060 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7061 IEMNATIVE_FRAME_VAR_SIZE / 8);
7062 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7063 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7064 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7065 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7066 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7067 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7068 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7069 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7070 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7071 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7072 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7073 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7074
7075 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7076 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7077 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7078 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7079
7080 /* retab / ret */
7081# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7082 if (1)
7083 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7084 else
7085# endif
7086 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7087
7088#else
7089# error "port me"
7090#endif
7091 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7092
7093 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7094}
7095
7096
7097/**
7098 * Emits a standard prolog.
7099 */
7100static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7101{
7102#ifdef RT_ARCH_AMD64
7103 /*
7104 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7105 * reserving 64 bytes for stack variables plus 4 non-register argument
7106 * slots. Fixed register assignment: xBX = pReNative;
7107 *
7108 * Since we always do the same register spilling, we can use the same
7109 * unwind description for all the code.
7110 */
7111 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7112 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7113 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7114 pbCodeBuf[off++] = 0x8b;
7115 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7116 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7117 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7118# ifdef RT_OS_WINDOWS
7119 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7120 pbCodeBuf[off++] = 0x8b;
7121 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7122 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7123 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7124# else
7125 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7126 pbCodeBuf[off++] = 0x8b;
7127 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7128# endif
7129 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7130 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7131 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7132 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7133 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7134 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7135 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7136 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7137
7138# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7139 /* Save the frame pointer. */
7140 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7141# endif
7142
7143 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7144 X86_GREG_xSP,
7145 IEMNATIVE_FRAME_ALIGN_SIZE
7146 + IEMNATIVE_FRAME_VAR_SIZE
7147 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7148 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7149 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7150 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7151 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7152
7153#elif RT_ARCH_ARM64
7154 /*
7155 * We set up a stack frame exactly like on x86, only we have to push the
7156 * return address our selves here. We save all non-volatile registers.
7157 */
7158 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7159
7160# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7161 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7162 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7163 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7164 /* pacibsp */
7165 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7166# endif
7167
7168 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7169 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7170 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7171 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7172 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7173 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7174 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7175 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7176 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7177 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7178 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7179 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7180 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7181 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7182 /* Save the BP and LR (ret address) registers at the top of the frame. */
7183 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7184 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7185 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7186 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7187 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7188 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7189
7190 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7191 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7192
7193 /* mov r28, r0 */
7194 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7195 /* mov r27, r1 */
7196 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7197
7198# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7199 /* Save the frame pointer. */
7200 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7201 ARMV8_A64_REG_X2);
7202# endif
7203
7204#else
7205# error "port me"
7206#endif
7207 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7208 return off;
7209}
7210
7211
7212/*********************************************************************************************************************************
7213* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7214*********************************************************************************************************************************/
7215
7216/**
7217 * Internal work that allocates a variable with kind set to
7218 * kIemNativeVarKind_Invalid and no current stack allocation.
7219 *
7220 * The kind will either be set by the caller or later when the variable is first
7221 * assigned a value.
7222 *
7223 * @returns Unpacked index.
7224 * @internal
7225 */
7226static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7227{
7228 Assert(cbType > 0 && cbType <= 64);
7229 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7230 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7231 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7232 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7233 pReNative->Core.aVars[idxVar].cbVar = cbType;
7234 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7235 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7236 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7237 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7238 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7239 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7240 pReNative->Core.aVars[idxVar].u.uValue = 0;
7241#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7242 pReNative->Core.aVars[idxVar].fSimdReg = false;
7243#endif
7244 return idxVar;
7245}
7246
7247
7248/**
7249 * Internal work that allocates an argument variable w/o setting enmKind.
7250 *
7251 * @returns Unpacked index.
7252 * @internal
7253 */
7254static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7255{
7256 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7257 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7258 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7259
7260 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7261 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7262 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7263 return idxVar;
7264}
7265
7266
7267/**
7268 * Gets the stack slot for a stack variable, allocating one if necessary.
7269 *
7270 * Calling this function implies that the stack slot will contain a valid
7271 * variable value. The caller deals with any register currently assigned to the
7272 * variable, typically by spilling it into the stack slot.
7273 *
7274 * @returns The stack slot number.
7275 * @param pReNative The recompiler state.
7276 * @param idxVar The variable.
7277 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7278 */
7279DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7280{
7281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7282 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7283 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7284
7285 /* Already got a slot? */
7286 uint8_t const idxStackSlot = pVar->idxStackSlot;
7287 if (idxStackSlot != UINT8_MAX)
7288 {
7289 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7290 return idxStackSlot;
7291 }
7292
7293 /*
7294 * A single slot is easy to allocate.
7295 * Allocate them from the top end, closest to BP, to reduce the displacement.
7296 */
7297 if (pVar->cbVar <= sizeof(uint64_t))
7298 {
7299 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7300 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7301 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7302 pVar->idxStackSlot = (uint8_t)iSlot;
7303 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7304 return (uint8_t)iSlot;
7305 }
7306
7307 /*
7308 * We need more than one stack slot.
7309 *
7310 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7311 */
7312 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7313 Assert(pVar->cbVar <= 64);
7314 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7315 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7316 uint32_t bmStack = pReNative->Core.bmStack;
7317 while (bmStack != UINT32_MAX)
7318 {
7319 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7320 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7321 iSlot = (iSlot - 1) & ~fBitAlignMask;
7322 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7323 {
7324 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7325 pVar->idxStackSlot = (uint8_t)iSlot;
7326 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7327 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7328 return (uint8_t)iSlot;
7329 }
7330
7331 bmStack |= (fBitAllocMask << iSlot);
7332 }
7333 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7334}
7335
7336
7337/**
7338 * Changes the variable to a stack variable.
7339 *
7340 * Currently this is s only possible to do the first time the variable is used,
7341 * switching later is can be implemented but not done.
7342 *
7343 * @param pReNative The recompiler state.
7344 * @param idxVar The variable.
7345 * @throws VERR_IEM_VAR_IPE_2
7346 */
7347DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7348{
7349 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7350 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7351 if (pVar->enmKind != kIemNativeVarKind_Stack)
7352 {
7353 /* We could in theory transition from immediate to stack as well, but it
7354 would involve the caller doing work storing the value on the stack. So,
7355 till that's required we only allow transition from invalid. */
7356 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7357 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7358 pVar->enmKind = kIemNativeVarKind_Stack;
7359
7360 /* Note! We don't allocate a stack slot here, that's only done when a
7361 slot is actually needed to hold a variable value. */
7362 }
7363}
7364
7365
7366/**
7367 * Sets it to a variable with a constant value.
7368 *
7369 * This does not require stack storage as we know the value and can always
7370 * reload it, unless of course it's referenced.
7371 *
7372 * @param pReNative The recompiler state.
7373 * @param idxVar The variable.
7374 * @param uValue The immediate value.
7375 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7376 */
7377DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7378{
7379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7380 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7381 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7382 {
7383 /* Only simple transitions for now. */
7384 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7385 pVar->enmKind = kIemNativeVarKind_Immediate;
7386 }
7387 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7388
7389 pVar->u.uValue = uValue;
7390 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7391 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7392 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7393}
7394
7395
7396/**
7397 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7398 *
7399 * This does not require stack storage as we know the value and can always
7400 * reload it. Loading is postponed till needed.
7401 *
7402 * @param pReNative The recompiler state.
7403 * @param idxVar The variable. Unpacked.
7404 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7405 *
7406 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7407 * @internal
7408 */
7409static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7410{
7411 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7412 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7413
7414 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7415 {
7416 /* Only simple transitions for now. */
7417 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7418 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7419 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7420 }
7421 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7422
7423 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7424
7425 /* Update the other variable, ensure it's a stack variable. */
7426 /** @todo handle variables with const values... that'll go boom now. */
7427 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7428 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7429}
7430
7431
7432/**
7433 * Sets the variable to a reference (pointer) to a guest register reference.
7434 *
7435 * This does not require stack storage as we know the value and can always
7436 * reload it. Loading is postponed till needed.
7437 *
7438 * @param pReNative The recompiler state.
7439 * @param idxVar The variable.
7440 * @param enmRegClass The class guest registers to reference.
7441 * @param idxReg The register within @a enmRegClass to reference.
7442 *
7443 * @throws VERR_IEM_VAR_IPE_2
7444 */
7445DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7446 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7447{
7448 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7449 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7450
7451 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7452 {
7453 /* Only simple transitions for now. */
7454 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7455 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7456 }
7457 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7458
7459 pVar->u.GstRegRef.enmClass = enmRegClass;
7460 pVar->u.GstRegRef.idx = idxReg;
7461}
7462
7463
7464DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7465{
7466 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7467}
7468
7469
7470DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7471{
7472 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7473
7474 /* Since we're using a generic uint64_t value type, we must truncate it if
7475 the variable is smaller otherwise we may end up with too large value when
7476 scaling up a imm8 w/ sign-extension.
7477
7478 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7479 in the bios, bx=1) when running on arm, because clang expect 16-bit
7480 register parameters to have bits 16 and up set to zero. Instead of
7481 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7482 CF value in the result. */
7483 switch (cbType)
7484 {
7485 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7486 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7487 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7488 }
7489 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7490 return idxVar;
7491}
7492
7493
7494DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7495{
7496 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7497 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7498 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7499 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7500 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7501 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7502
7503 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7504 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7505 return idxArgVar;
7506}
7507
7508
7509DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7510{
7511 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7512 /* Don't set to stack now, leave that to the first use as for instance
7513 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7514 return idxVar;
7515}
7516
7517
7518DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7519{
7520 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7521
7522 /* Since we're using a generic uint64_t value type, we must truncate it if
7523 the variable is smaller otherwise we may end up with too large value when
7524 scaling up a imm8 w/ sign-extension. */
7525 switch (cbType)
7526 {
7527 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7528 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7529 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7530 }
7531 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7532 return idxVar;
7533}
7534
7535
7536/**
7537 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7538 * fixed till we call iemNativeVarRegisterRelease.
7539 *
7540 * @returns The host register number.
7541 * @param pReNative The recompiler state.
7542 * @param idxVar The variable.
7543 * @param poff Pointer to the instruction buffer offset.
7544 * In case a register needs to be freed up or the value
7545 * loaded off the stack.
7546 * @param fInitialized Set if the variable must already have been initialized.
7547 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7548 * the case.
7549 * @param idxRegPref Preferred register number or UINT8_MAX.
7550 */
7551DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7552 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7553{
7554 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7555 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7556 Assert(pVar->cbVar <= 8);
7557 Assert(!pVar->fRegAcquired);
7558
7559 uint8_t idxReg = pVar->idxReg;
7560 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7561 {
7562 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7563 && pVar->enmKind < kIemNativeVarKind_End);
7564 pVar->fRegAcquired = true;
7565 return idxReg;
7566 }
7567
7568 /*
7569 * If the kind of variable has not yet been set, default to 'stack'.
7570 */
7571 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7572 && pVar->enmKind < kIemNativeVarKind_End);
7573 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7574 iemNativeVarSetKindToStack(pReNative, idxVar);
7575
7576 /*
7577 * We have to allocate a register for the variable, even if its a stack one
7578 * as we don't know if there are modification being made to it before its
7579 * finalized (todo: analyze and insert hints about that?).
7580 *
7581 * If we can, we try get the correct register for argument variables. This
7582 * is assuming that most argument variables are fetched as close as possible
7583 * to the actual call, so that there aren't any interfering hidden calls
7584 * (memory accesses, etc) inbetween.
7585 *
7586 * If we cannot or it's a variable, we make sure no argument registers
7587 * that will be used by this MC block will be allocated here, and we always
7588 * prefer non-volatile registers to avoid needing to spill stuff for internal
7589 * call.
7590 */
7591 /** @todo Detect too early argument value fetches and warn about hidden
7592 * calls causing less optimal code to be generated in the python script. */
7593
7594 uint8_t const uArgNo = pVar->uArgNo;
7595 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7596 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7597 {
7598 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7599 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7600 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7601 }
7602 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7603 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7604 {
7605 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7606 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7607 & ~pReNative->Core.bmHstRegsWithGstShadow
7608 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7609 & fNotArgsMask;
7610 if (fRegs)
7611 {
7612 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7613 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7614 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7615 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7616 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7617 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7618 }
7619 else
7620 {
7621 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7622 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7623 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7624 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7625 }
7626 }
7627 else
7628 {
7629 idxReg = idxRegPref;
7630 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7631 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7632 }
7633 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7634 pVar->idxReg = idxReg;
7635
7636#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7637 pVar->fSimdReg = false;
7638#endif
7639
7640 /*
7641 * Load it off the stack if we've got a stack slot.
7642 */
7643 uint8_t const idxStackSlot = pVar->idxStackSlot;
7644 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7645 {
7646 Assert(fInitialized);
7647 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7648 switch (pVar->cbVar)
7649 {
7650 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7651 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7652 case 3: AssertFailed(); RT_FALL_THRU();
7653 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7654 default: AssertFailed(); RT_FALL_THRU();
7655 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7656 }
7657 }
7658 else
7659 {
7660 Assert(idxStackSlot == UINT8_MAX);
7661 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7662 }
7663 pVar->fRegAcquired = true;
7664 return idxReg;
7665}
7666
7667
7668#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7669/**
7670 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7671 * fixed till we call iemNativeVarRegisterRelease.
7672 *
7673 * @returns The host register number.
7674 * @param pReNative The recompiler state.
7675 * @param idxVar The variable.
7676 * @param poff Pointer to the instruction buffer offset.
7677 * In case a register needs to be freed up or the value
7678 * loaded off the stack.
7679 * @param fInitialized Set if the variable must already have been initialized.
7680 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7681 * the case.
7682 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7683 */
7684DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7685 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7686{
7687 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7688 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7689 Assert( pVar->cbVar == sizeof(RTUINT128U)
7690 || pVar->cbVar == sizeof(RTUINT256U));
7691 Assert(!pVar->fRegAcquired);
7692
7693 uint8_t idxReg = pVar->idxReg;
7694 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7695 {
7696 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7697 && pVar->enmKind < kIemNativeVarKind_End);
7698 pVar->fRegAcquired = true;
7699 return idxReg;
7700 }
7701
7702 /*
7703 * If the kind of variable has not yet been set, default to 'stack'.
7704 */
7705 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7706 && pVar->enmKind < kIemNativeVarKind_End);
7707 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7708 iemNativeVarSetKindToStack(pReNative, idxVar);
7709
7710 /*
7711 * We have to allocate a register for the variable, even if its a stack one
7712 * as we don't know if there are modification being made to it before its
7713 * finalized (todo: analyze and insert hints about that?).
7714 *
7715 * If we can, we try get the correct register for argument variables. This
7716 * is assuming that most argument variables are fetched as close as possible
7717 * to the actual call, so that there aren't any interfering hidden calls
7718 * (memory accesses, etc) inbetween.
7719 *
7720 * If we cannot or it's a variable, we make sure no argument registers
7721 * that will be used by this MC block will be allocated here, and we always
7722 * prefer non-volatile registers to avoid needing to spill stuff for internal
7723 * call.
7724 */
7725 /** @todo Detect too early argument value fetches and warn about hidden
7726 * calls causing less optimal code to be generated in the python script. */
7727
7728 uint8_t const uArgNo = pVar->uArgNo;
7729 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7730
7731 /* SIMD is bit simpler for now because there is no support for arguments. */
7732 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7733 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7734 {
7735 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7736 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7737 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7738 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7739 & fNotArgsMask;
7740 if (fRegs)
7741 {
7742 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7743 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7744 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7745 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7746 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7747 }
7748 else
7749 {
7750 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7751 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7752 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7753 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7754 }
7755 }
7756 else
7757 {
7758 idxReg = idxRegPref;
7759 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7760 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7761 }
7762 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7763
7764 pVar->fSimdReg = true;
7765 pVar->idxReg = idxReg;
7766
7767 /*
7768 * Load it off the stack if we've got a stack slot.
7769 */
7770 uint8_t const idxStackSlot = pVar->idxStackSlot;
7771 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7772 {
7773 Assert(fInitialized);
7774 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7775 switch (pVar->cbVar)
7776 {
7777 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7778 default: AssertFailed(); RT_FALL_THRU();
7779 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7780 }
7781 }
7782 else
7783 {
7784 Assert(idxStackSlot == UINT8_MAX);
7785 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7786 }
7787 pVar->fRegAcquired = true;
7788 return idxReg;
7789}
7790#endif
7791
7792
7793/**
7794 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7795 * guest register.
7796 *
7797 * This function makes sure there is a register for it and sets it to be the
7798 * current shadow copy of @a enmGstReg.
7799 *
7800 * @returns The host register number.
7801 * @param pReNative The recompiler state.
7802 * @param idxVar The variable.
7803 * @param enmGstReg The guest register this variable will be written to
7804 * after this call.
7805 * @param poff Pointer to the instruction buffer offset.
7806 * In case a register needs to be freed up or if the
7807 * variable content needs to be loaded off the stack.
7808 *
7809 * @note We DO NOT expect @a idxVar to be an argument variable,
7810 * because we can only in the commit stage of an instruction when this
7811 * function is used.
7812 */
7813DECL_HIDDEN_THROW(uint8_t)
7814iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7815{
7816 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7817 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7818 Assert(!pVar->fRegAcquired);
7819 AssertMsgStmt( pVar->cbVar <= 8
7820 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7821 || pVar->enmKind == kIemNativeVarKind_Stack),
7822 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7823 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7824 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7825
7826 /*
7827 * This shouldn't ever be used for arguments, unless it's in a weird else
7828 * branch that doesn't do any calling and even then it's questionable.
7829 *
7830 * However, in case someone writes crazy wrong MC code and does register
7831 * updates before making calls, just use the regular register allocator to
7832 * ensure we get a register suitable for the intended argument number.
7833 */
7834 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7835
7836 /*
7837 * If there is already a register for the variable, we transfer/set the
7838 * guest shadow copy assignment to it.
7839 */
7840 uint8_t idxReg = pVar->idxReg;
7841 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7842 {
7843 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7844 {
7845 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7846 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7847 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7848 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7849 }
7850 else
7851 {
7852 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7853 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7854 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7855 }
7856 /** @todo figure this one out. We need some way of making sure the register isn't
7857 * modified after this point, just in case we start writing crappy MC code. */
7858 pVar->enmGstReg = enmGstReg;
7859 pVar->fRegAcquired = true;
7860 return idxReg;
7861 }
7862 Assert(pVar->uArgNo == UINT8_MAX);
7863
7864 /*
7865 * Because this is supposed to be the commit stage, we're just tag along with the
7866 * temporary register allocator and upgrade it to a variable register.
7867 */
7868 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7869 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7870 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7871 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7872 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7873 pVar->idxReg = idxReg;
7874
7875 /*
7876 * Now we need to load the register value.
7877 */
7878 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7879 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7880 else
7881 {
7882 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7883 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7884 switch (pVar->cbVar)
7885 {
7886 case sizeof(uint64_t):
7887 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7888 break;
7889 case sizeof(uint32_t):
7890 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7891 break;
7892 case sizeof(uint16_t):
7893 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7894 break;
7895 case sizeof(uint8_t):
7896 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7897 break;
7898 default:
7899 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7900 }
7901 }
7902
7903 pVar->fRegAcquired = true;
7904 return idxReg;
7905}
7906
7907
7908/**
7909 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7910 *
7911 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7912 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7913 * requirement of flushing anything in volatile host registers when making a
7914 * call.
7915 *
7916 * @returns New @a off value.
7917 * @param pReNative The recompiler state.
7918 * @param off The code buffer position.
7919 * @param fHstRegsNotToSave Set of registers not to save & restore.
7920 */
7921DECL_HIDDEN_THROW(uint32_t)
7922iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7923{
7924 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7925 if (fHstRegs)
7926 {
7927 do
7928 {
7929 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7930 fHstRegs &= ~RT_BIT_32(idxHstReg);
7931
7932 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7933 {
7934 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7935 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7936 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7937 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7938 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7939 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7940 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7941 {
7942 case kIemNativeVarKind_Stack:
7943 {
7944 /* Temporarily spill the variable register. */
7945 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7946 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7947 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7948 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7949 continue;
7950 }
7951
7952 case kIemNativeVarKind_Immediate:
7953 case kIemNativeVarKind_VarRef:
7954 case kIemNativeVarKind_GstRegRef:
7955 /* It is weird to have any of these loaded at this point. */
7956 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7957 continue;
7958
7959 case kIemNativeVarKind_End:
7960 case kIemNativeVarKind_Invalid:
7961 break;
7962 }
7963 AssertFailed();
7964 }
7965 else
7966 {
7967 /*
7968 * Allocate a temporary stack slot and spill the register to it.
7969 */
7970 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7971 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7972 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7973 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7974 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7975 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7976 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7977 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7978 }
7979 } while (fHstRegs);
7980 }
7981#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7982 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7983 if (fHstRegs)
7984 {
7985 do
7986 {
7987 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7988 fHstRegs &= ~RT_BIT_32(idxHstReg);
7989
7990 /*
7991 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7992 * which would be more difficult due to spanning multiple stack slots and different sizes
7993 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7994 * don't need saving.
7995 */
7996 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7997 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7998 continue;
7999
8000 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8001
8002 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8003 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8004 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8005 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8006 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8007 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8008 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8009 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8010 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8011 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8012 {
8013 case kIemNativeVarKind_Stack:
8014 {
8015 /* Temporarily spill the variable register. */
8016 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8017 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8018 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8019 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8020 if (cbVar == sizeof(RTUINT128U))
8021 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8022 else
8023 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8024 continue;
8025 }
8026
8027 case kIemNativeVarKind_Immediate:
8028 case kIemNativeVarKind_VarRef:
8029 case kIemNativeVarKind_GstRegRef:
8030 /* It is weird to have any of these loaded at this point. */
8031 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8032 continue;
8033
8034 case kIemNativeVarKind_End:
8035 case kIemNativeVarKind_Invalid:
8036 break;
8037 }
8038 AssertFailed();
8039 } while (fHstRegs);
8040 }
8041#endif
8042 return off;
8043}
8044
8045
8046/**
8047 * Emit code to restore volatile registers after to a call to a helper.
8048 *
8049 * @returns New @a off value.
8050 * @param pReNative The recompiler state.
8051 * @param off The code buffer position.
8052 * @param fHstRegsNotToSave Set of registers not to save & restore.
8053 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8054 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8055 */
8056DECL_HIDDEN_THROW(uint32_t)
8057iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8058{
8059 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8060 if (fHstRegs)
8061 {
8062 do
8063 {
8064 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8065 fHstRegs &= ~RT_BIT_32(idxHstReg);
8066
8067 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8068 {
8069 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8071 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8072 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8073 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8074 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8075 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8076 {
8077 case kIemNativeVarKind_Stack:
8078 {
8079 /* Unspill the variable register. */
8080 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8081 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8082 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8083 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8084 continue;
8085 }
8086
8087 case kIemNativeVarKind_Immediate:
8088 case kIemNativeVarKind_VarRef:
8089 case kIemNativeVarKind_GstRegRef:
8090 /* It is weird to have any of these loaded at this point. */
8091 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8092 continue;
8093
8094 case kIemNativeVarKind_End:
8095 case kIemNativeVarKind_Invalid:
8096 break;
8097 }
8098 AssertFailed();
8099 }
8100 else
8101 {
8102 /*
8103 * Restore from temporary stack slot.
8104 */
8105 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8106 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8107 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8108 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8109
8110 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8111 }
8112 } while (fHstRegs);
8113 }
8114#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8115 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8116 if (fHstRegs)
8117 {
8118 do
8119 {
8120 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8121 fHstRegs &= ~RT_BIT_32(idxHstReg);
8122
8123 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8124 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8125 continue;
8126 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8127
8128 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8129 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8130 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8131 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8132 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8133 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8134 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8135 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8136 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8137 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8138 {
8139 case kIemNativeVarKind_Stack:
8140 {
8141 /* Unspill the variable register. */
8142 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8143 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8144 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8145 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8146
8147 if (cbVar == sizeof(RTUINT128U))
8148 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8149 else
8150 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8151 continue;
8152 }
8153
8154 case kIemNativeVarKind_Immediate:
8155 case kIemNativeVarKind_VarRef:
8156 case kIemNativeVarKind_GstRegRef:
8157 /* It is weird to have any of these loaded at this point. */
8158 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8159 continue;
8160
8161 case kIemNativeVarKind_End:
8162 case kIemNativeVarKind_Invalid:
8163 break;
8164 }
8165 AssertFailed();
8166 } while (fHstRegs);
8167 }
8168#endif
8169 return off;
8170}
8171
8172
8173/**
8174 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8175 *
8176 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8177 *
8178 * ASSUMES that @a idxVar is valid and unpacked.
8179 */
8180DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8181{
8182 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8183 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8184 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8185 {
8186 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8187 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8188 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8189 Assert(cSlots > 0);
8190 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8191 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8192 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8193 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8194 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8195 }
8196 else
8197 Assert(idxStackSlot == UINT8_MAX);
8198}
8199
8200
8201/**
8202 * Worker that frees a single variable.
8203 *
8204 * ASSUMES that @a idxVar is valid and unpacked.
8205 */
8206DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8207{
8208 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8209 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8210 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8211
8212 /* Free the host register first if any assigned. */
8213 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8214#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8215 if ( idxHstReg != UINT8_MAX
8216 && pReNative->Core.aVars[idxVar].fSimdReg)
8217 {
8218 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8219 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8220 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8221 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8222 }
8223 else
8224#endif
8225 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8226 {
8227 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8228 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8229 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8230 }
8231
8232 /* Free argument mapping. */
8233 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8234 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8235 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8236
8237 /* Free the stack slots. */
8238 iemNativeVarFreeStackSlots(pReNative, idxVar);
8239
8240 /* Free the actual variable. */
8241 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8242 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8243}
8244
8245
8246/**
8247 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8248 */
8249DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8250{
8251 while (bmVars != 0)
8252 {
8253 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8254 bmVars &= ~RT_BIT_32(idxVar);
8255
8256#if 1 /** @todo optimize by simplifying this later... */
8257 iemNativeVarFreeOneWorker(pReNative, idxVar);
8258#else
8259 /* Only need to free the host register, the rest is done as bulk updates below. */
8260 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8261 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8262 {
8263 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8264 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8265 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8266 }
8267#endif
8268 }
8269#if 0 /** @todo optimize by simplifying this later... */
8270 pReNative->Core.bmVars = 0;
8271 pReNative->Core.bmStack = 0;
8272 pReNative->Core.u64ArgVars = UINT64_MAX;
8273#endif
8274}
8275
8276
8277
8278/*********************************************************************************************************************************
8279* Emitters for IEM_MC_CALL_CIMPL_XXX *
8280*********************************************************************************************************************************/
8281
8282/**
8283 * Emits code to load a reference to the given guest register into @a idxGprDst.
8284 */
8285DECL_HIDDEN_THROW(uint32_t)
8286iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8287 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8288{
8289#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8290 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8291#endif
8292
8293 /*
8294 * Get the offset relative to the CPUMCTX structure.
8295 */
8296 uint32_t offCpumCtx;
8297 switch (enmClass)
8298 {
8299 case kIemNativeGstRegRef_Gpr:
8300 Assert(idxRegInClass < 16);
8301 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8302 break;
8303
8304 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8305 Assert(idxRegInClass < 4);
8306 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8307 break;
8308
8309 case kIemNativeGstRegRef_EFlags:
8310 Assert(idxRegInClass == 0);
8311 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8312 break;
8313
8314 case kIemNativeGstRegRef_MxCsr:
8315 Assert(idxRegInClass == 0);
8316 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8317 break;
8318
8319 case kIemNativeGstRegRef_FpuReg:
8320 Assert(idxRegInClass < 8);
8321 AssertFailed(); /** @todo what kind of indexing? */
8322 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8323 break;
8324
8325 case kIemNativeGstRegRef_MReg:
8326 Assert(idxRegInClass < 8);
8327 AssertFailed(); /** @todo what kind of indexing? */
8328 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8329 break;
8330
8331 case kIemNativeGstRegRef_XReg:
8332 Assert(idxRegInClass < 16);
8333 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8334 break;
8335
8336 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8337 Assert(idxRegInClass == 0);
8338 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8339 break;
8340
8341 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8342 Assert(idxRegInClass == 0);
8343 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8344 break;
8345
8346 default:
8347 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8348 }
8349
8350 /*
8351 * Load the value into the destination register.
8352 */
8353#ifdef RT_ARCH_AMD64
8354 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8355
8356#elif defined(RT_ARCH_ARM64)
8357 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8358 Assert(offCpumCtx < 4096);
8359 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8360
8361#else
8362# error "Port me!"
8363#endif
8364
8365 return off;
8366}
8367
8368
8369/**
8370 * Common code for CIMPL and AIMPL calls.
8371 *
8372 * These are calls that uses argument variables and such. They should not be
8373 * confused with internal calls required to implement an MC operation,
8374 * like a TLB load and similar.
8375 *
8376 * Upon return all that is left to do is to load any hidden arguments and
8377 * perform the call. All argument variables are freed.
8378 *
8379 * @returns New code buffer offset; throws VBox status code on error.
8380 * @param pReNative The native recompile state.
8381 * @param off The code buffer offset.
8382 * @param cArgs The total nubmer of arguments (includes hidden
8383 * count).
8384 * @param cHiddenArgs The number of hidden arguments. The hidden
8385 * arguments must not have any variable declared for
8386 * them, whereas all the regular arguments must
8387 * (tstIEMCheckMc ensures this).
8388 */
8389DECL_HIDDEN_THROW(uint32_t)
8390iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8391{
8392#ifdef VBOX_STRICT
8393 /*
8394 * Assert sanity.
8395 */
8396 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8397 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8398 for (unsigned i = 0; i < cHiddenArgs; i++)
8399 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8400 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8401 {
8402 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8403 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8404 }
8405 iemNativeRegAssertSanity(pReNative);
8406#endif
8407
8408 /* We don't know what the called function makes use of, so flush any pending register writes. */
8409 off = iemNativeRegFlushPendingWrites(pReNative, off);
8410
8411 /*
8412 * Before we do anything else, go over variables that are referenced and
8413 * make sure they are not in a register.
8414 */
8415 uint32_t bmVars = pReNative->Core.bmVars;
8416 if (bmVars)
8417 {
8418 do
8419 {
8420 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8421 bmVars &= ~RT_BIT_32(idxVar);
8422
8423 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8424 {
8425 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8426#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8427 if ( idxRegOld != UINT8_MAX
8428 && pReNative->Core.aVars[idxVar].fSimdReg)
8429 {
8430 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8431 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8432
8433 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8434 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8435 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8436 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8437 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8438 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8439 else
8440 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8441
8442 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8443 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8444
8445 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8446 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8447 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8448 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8449 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8450 }
8451 else
8452#endif
8453 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8454 {
8455 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8456 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8457 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8458 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8459 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8460
8461 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8462 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8463 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8464 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8465 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8466 }
8467 }
8468 } while (bmVars != 0);
8469#if 0 //def VBOX_STRICT
8470 iemNativeRegAssertSanity(pReNative);
8471#endif
8472 }
8473
8474 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8475
8476 /*
8477 * First, go over the host registers that will be used for arguments and make
8478 * sure they either hold the desired argument or are free.
8479 */
8480 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8481 {
8482 for (uint32_t i = 0; i < cRegArgs; i++)
8483 {
8484 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8485 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8486 {
8487 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8488 {
8489 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8491 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8492 Assert(pVar->idxReg == idxArgReg);
8493 uint8_t const uArgNo = pVar->uArgNo;
8494 if (uArgNo == i)
8495 { /* prefect */ }
8496 /* The variable allocator logic should make sure this is impossible,
8497 except for when the return register is used as a parameter (ARM,
8498 but not x86). */
8499#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8500 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8501 {
8502# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8503# error "Implement this"
8504# endif
8505 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8506 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8507 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8508 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8509 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8510 }
8511#endif
8512 else
8513 {
8514 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8515
8516 if (pVar->enmKind == kIemNativeVarKind_Stack)
8517 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8518 else
8519 {
8520 /* just free it, can be reloaded if used again */
8521 pVar->idxReg = UINT8_MAX;
8522 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8523 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8524 }
8525 }
8526 }
8527 else
8528 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8529 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8530 }
8531 }
8532#if 0 //def VBOX_STRICT
8533 iemNativeRegAssertSanity(pReNative);
8534#endif
8535 }
8536
8537 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8538
8539#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8540 /*
8541 * If there are any stack arguments, make sure they are in their place as well.
8542 *
8543 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8544 * the caller) be loading it later and it must be free (see first loop).
8545 */
8546 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8547 {
8548 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8549 {
8550 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8551 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8552 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8553 {
8554 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8555 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8556 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8557 pVar->idxReg = UINT8_MAX;
8558 }
8559 else
8560 {
8561 /* Use ARG0 as temp for stuff we need registers for. */
8562 switch (pVar->enmKind)
8563 {
8564 case kIemNativeVarKind_Stack:
8565 {
8566 uint8_t const idxStackSlot = pVar->idxStackSlot;
8567 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8568 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8569 iemNativeStackCalcBpDisp(idxStackSlot));
8570 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8571 continue;
8572 }
8573
8574 case kIemNativeVarKind_Immediate:
8575 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8576 continue;
8577
8578 case kIemNativeVarKind_VarRef:
8579 {
8580 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8581 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8582 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8583 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8584 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8585# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8586 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8587 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8588 if ( fSimdReg
8589 && idxRegOther != UINT8_MAX)
8590 {
8591 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8592 if (cbVar == sizeof(RTUINT128U))
8593 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8594 else
8595 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8596 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8597 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8598 }
8599 else
8600# endif
8601 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8602 {
8603 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8604 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8605 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8606 }
8607 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8608 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8609 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8610 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8611 continue;
8612 }
8613
8614 case kIemNativeVarKind_GstRegRef:
8615 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8616 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8617 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8618 continue;
8619
8620 case kIemNativeVarKind_Invalid:
8621 case kIemNativeVarKind_End:
8622 break;
8623 }
8624 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8625 }
8626 }
8627# if 0 //def VBOX_STRICT
8628 iemNativeRegAssertSanity(pReNative);
8629# endif
8630 }
8631#else
8632 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8633#endif
8634
8635 /*
8636 * Make sure the argument variables are loaded into their respective registers.
8637 *
8638 * We can optimize this by ASSUMING that any register allocations are for
8639 * registeres that have already been loaded and are ready. The previous step
8640 * saw to that.
8641 */
8642 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8643 {
8644 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8645 {
8646 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8647 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8648 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8649 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8650 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8651 else
8652 {
8653 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8654 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8655 {
8656 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8657 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8658 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8659 | RT_BIT_32(idxArgReg);
8660 pVar->idxReg = idxArgReg;
8661 }
8662 else
8663 {
8664 /* Use ARG0 as temp for stuff we need registers for. */
8665 switch (pVar->enmKind)
8666 {
8667 case kIemNativeVarKind_Stack:
8668 {
8669 uint8_t const idxStackSlot = pVar->idxStackSlot;
8670 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8671 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8672 continue;
8673 }
8674
8675 case kIemNativeVarKind_Immediate:
8676 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8677 continue;
8678
8679 case kIemNativeVarKind_VarRef:
8680 {
8681 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8682 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8683 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8684 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8685 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8686 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8687#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8688 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8689 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8690 if ( fSimdReg
8691 && idxRegOther != UINT8_MAX)
8692 {
8693 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8694 if (cbVar == sizeof(RTUINT128U))
8695 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8696 else
8697 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8698 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8699 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8700 }
8701 else
8702#endif
8703 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8704 {
8705 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8706 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8707 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8708 }
8709 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8710 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8711 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8712 continue;
8713 }
8714
8715 case kIemNativeVarKind_GstRegRef:
8716 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8717 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8718 continue;
8719
8720 case kIemNativeVarKind_Invalid:
8721 case kIemNativeVarKind_End:
8722 break;
8723 }
8724 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8725 }
8726 }
8727 }
8728#if 0 //def VBOX_STRICT
8729 iemNativeRegAssertSanity(pReNative);
8730#endif
8731 }
8732#ifdef VBOX_STRICT
8733 else
8734 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8735 {
8736 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8737 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8738 }
8739#endif
8740
8741 /*
8742 * Free all argument variables (simplified).
8743 * Their lifetime always expires with the call they are for.
8744 */
8745 /** @todo Make the python script check that arguments aren't used after
8746 * IEM_MC_CALL_XXXX. */
8747 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8748 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8749 * an argument value. There is also some FPU stuff. */
8750 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8751 {
8752 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8753 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8754
8755 /* no need to free registers: */
8756 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8757 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8758 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8759 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8760 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8761 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8762
8763 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8764 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8765 iemNativeVarFreeStackSlots(pReNative, idxVar);
8766 }
8767 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8768
8769 /*
8770 * Flush volatile registers as we make the call.
8771 */
8772 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8773
8774 return off;
8775}
8776
8777
8778
8779/*********************************************************************************************************************************
8780* TLB Lookup. *
8781*********************************************************************************************************************************/
8782
8783/**
8784 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8785 */
8786DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8787{
8788 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8789 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8790 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8791 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8792
8793 /* Do the lookup manually. */
8794 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8795 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8796 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8797 if (RT_LIKELY(pTlbe->uTag == uTag))
8798 {
8799 /*
8800 * Check TLB page table level access flags.
8801 */
8802 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8803 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8804 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8805 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8806 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8807 | IEMTLBE_F_PG_UNASSIGNED
8808 | IEMTLBE_F_PT_NO_ACCESSED
8809 | fNoWriteNoDirty | fNoUser);
8810 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8811 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8812 {
8813 /*
8814 * Return the address.
8815 */
8816 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8817 if ((uintptr_t)pbAddr == uResult)
8818 return;
8819 RT_NOREF(cbMem);
8820 AssertFailed();
8821 }
8822 else
8823 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8824 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8825 }
8826 else
8827 AssertFailed();
8828 RT_BREAKPOINT();
8829}
8830
8831/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8832
8833
8834
8835/*********************************************************************************************************************************
8836* Recompiler Core. *
8837*********************************************************************************************************************************/
8838
8839/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8840static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8841{
8842 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8843 pDis->cbCachedInstr += cbMaxRead;
8844 RT_NOREF(cbMinRead);
8845 return VERR_NO_DATA;
8846}
8847
8848
8849DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8850{
8851 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8852 {
8853#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8854 ENTRY(fLocalForcedActions),
8855 ENTRY(iem.s.rcPassUp),
8856 ENTRY(iem.s.fExec),
8857 ENTRY(iem.s.pbInstrBuf),
8858 ENTRY(iem.s.uInstrBufPc),
8859 ENTRY(iem.s.GCPhysInstrBuf),
8860 ENTRY(iem.s.cbInstrBufTotal),
8861 ENTRY(iem.s.idxTbCurInstr),
8862#ifdef VBOX_WITH_STATISTICS
8863 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8864 ENTRY(iem.s.StatNativeTlbHitsForStore),
8865 ENTRY(iem.s.StatNativeTlbHitsForStack),
8866 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8867 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8868 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8869 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8870 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8871#endif
8872 ENTRY(iem.s.DataTlb.aEntries),
8873 ENTRY(iem.s.DataTlb.uTlbRevision),
8874 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8875 ENTRY(iem.s.DataTlb.cTlbHits),
8876 ENTRY(iem.s.CodeTlb.aEntries),
8877 ENTRY(iem.s.CodeTlb.uTlbRevision),
8878 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8879 ENTRY(iem.s.CodeTlb.cTlbHits),
8880 ENTRY(pVMR3),
8881 ENTRY(cpum.GstCtx.rax),
8882 ENTRY(cpum.GstCtx.ah),
8883 ENTRY(cpum.GstCtx.rcx),
8884 ENTRY(cpum.GstCtx.ch),
8885 ENTRY(cpum.GstCtx.rdx),
8886 ENTRY(cpum.GstCtx.dh),
8887 ENTRY(cpum.GstCtx.rbx),
8888 ENTRY(cpum.GstCtx.bh),
8889 ENTRY(cpum.GstCtx.rsp),
8890 ENTRY(cpum.GstCtx.rbp),
8891 ENTRY(cpum.GstCtx.rsi),
8892 ENTRY(cpum.GstCtx.rdi),
8893 ENTRY(cpum.GstCtx.r8),
8894 ENTRY(cpum.GstCtx.r9),
8895 ENTRY(cpum.GstCtx.r10),
8896 ENTRY(cpum.GstCtx.r11),
8897 ENTRY(cpum.GstCtx.r12),
8898 ENTRY(cpum.GstCtx.r13),
8899 ENTRY(cpum.GstCtx.r14),
8900 ENTRY(cpum.GstCtx.r15),
8901 ENTRY(cpum.GstCtx.es.Sel),
8902 ENTRY(cpum.GstCtx.es.u64Base),
8903 ENTRY(cpum.GstCtx.es.u32Limit),
8904 ENTRY(cpum.GstCtx.es.Attr),
8905 ENTRY(cpum.GstCtx.cs.Sel),
8906 ENTRY(cpum.GstCtx.cs.u64Base),
8907 ENTRY(cpum.GstCtx.cs.u32Limit),
8908 ENTRY(cpum.GstCtx.cs.Attr),
8909 ENTRY(cpum.GstCtx.ss.Sel),
8910 ENTRY(cpum.GstCtx.ss.u64Base),
8911 ENTRY(cpum.GstCtx.ss.u32Limit),
8912 ENTRY(cpum.GstCtx.ss.Attr),
8913 ENTRY(cpum.GstCtx.ds.Sel),
8914 ENTRY(cpum.GstCtx.ds.u64Base),
8915 ENTRY(cpum.GstCtx.ds.u32Limit),
8916 ENTRY(cpum.GstCtx.ds.Attr),
8917 ENTRY(cpum.GstCtx.fs.Sel),
8918 ENTRY(cpum.GstCtx.fs.u64Base),
8919 ENTRY(cpum.GstCtx.fs.u32Limit),
8920 ENTRY(cpum.GstCtx.fs.Attr),
8921 ENTRY(cpum.GstCtx.gs.Sel),
8922 ENTRY(cpum.GstCtx.gs.u64Base),
8923 ENTRY(cpum.GstCtx.gs.u32Limit),
8924 ENTRY(cpum.GstCtx.gs.Attr),
8925 ENTRY(cpum.GstCtx.rip),
8926 ENTRY(cpum.GstCtx.eflags),
8927 ENTRY(cpum.GstCtx.uRipInhibitInt),
8928 ENTRY(cpum.GstCtx.cr0),
8929 ENTRY(cpum.GstCtx.cr4),
8930 ENTRY(cpum.GstCtx.aXcr[0]),
8931 ENTRY(cpum.GstCtx.aXcr[1]),
8932#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8933 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8934 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8935 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8936 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8937 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8938 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8939 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8940 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8941 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8942 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8943 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8944 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8945 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8946 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8947 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8948 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8949 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8950 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8951 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8952 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8953 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8954 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8955 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8956 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8957 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8958 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8959 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8960 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8961 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8962 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8963 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8964 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8965#endif
8966#undef ENTRY
8967 };
8968#ifdef VBOX_STRICT
8969 static bool s_fOrderChecked = false;
8970 if (!s_fOrderChecked)
8971 {
8972 s_fOrderChecked = true;
8973 uint32_t offPrev = s_aMembers[0].off;
8974 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8975 {
8976 Assert(s_aMembers[i].off > offPrev);
8977 offPrev = s_aMembers[i].off;
8978 }
8979 }
8980#endif
8981
8982 /*
8983 * Binary lookup.
8984 */
8985 unsigned iStart = 0;
8986 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8987 for (;;)
8988 {
8989 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8990 uint32_t const offCur = s_aMembers[iCur].off;
8991 if (off < offCur)
8992 {
8993 if (iCur != iStart)
8994 iEnd = iCur;
8995 else
8996 break;
8997 }
8998 else if (off > offCur)
8999 {
9000 if (iCur + 1 < iEnd)
9001 iStart = iCur + 1;
9002 else
9003 break;
9004 }
9005 else
9006 return s_aMembers[iCur].pszName;
9007 }
9008#ifdef VBOX_WITH_STATISTICS
9009 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9010 return "iem.s.acThreadedFuncStats[iFn]";
9011#endif
9012 return NULL;
9013}
9014
9015
9016DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9017{
9018 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9019#if defined(RT_ARCH_AMD64)
9020 static const char * const a_apszMarkers[] =
9021 {
9022 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9023 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9024 };
9025#endif
9026
9027 char szDisBuf[512];
9028 DISSTATE Dis;
9029 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9030 uint32_t const cNative = pTb->Native.cInstructions;
9031 uint32_t offNative = 0;
9032#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9033 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9034#endif
9035 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9036 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9037 : DISCPUMODE_64BIT;
9038#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9039 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9040#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9041 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9042#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9043# error "Port me"
9044#else
9045 csh hDisasm = ~(size_t)0;
9046# if defined(RT_ARCH_AMD64)
9047 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9048# elif defined(RT_ARCH_ARM64)
9049 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9050# else
9051# error "Port me"
9052# endif
9053 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9054
9055 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9056 //Assert(rcCs == CS_ERR_OK);
9057#endif
9058
9059 /*
9060 * Print TB info.
9061 */
9062 pHlp->pfnPrintf(pHlp,
9063 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9064 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9065 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9066 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9067#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9068 if (pDbgInfo && pDbgInfo->cEntries > 1)
9069 {
9070 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9071
9072 /*
9073 * This disassembly is driven by the debug info which follows the native
9074 * code and indicates when it starts with the next guest instructions,
9075 * where labels are and such things.
9076 */
9077 uint32_t idxThreadedCall = 0;
9078 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9079 uint8_t idxRange = UINT8_MAX;
9080 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9081 uint32_t offRange = 0;
9082 uint32_t offOpcodes = 0;
9083 uint32_t const cbOpcodes = pTb->cbOpcodes;
9084 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9085 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9086 uint32_t iDbgEntry = 1;
9087 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9088
9089 while (offNative < cNative)
9090 {
9091 /* If we're at or have passed the point where the next chunk of debug
9092 info starts, process it. */
9093 if (offDbgNativeNext <= offNative)
9094 {
9095 offDbgNativeNext = UINT32_MAX;
9096 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9097 {
9098 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9099 {
9100 case kIemTbDbgEntryType_GuestInstruction:
9101 {
9102 /* Did the exec flag change? */
9103 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9104 {
9105 pHlp->pfnPrintf(pHlp,
9106 " fExec change %#08x -> %#08x %s\n",
9107 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9108 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9109 szDisBuf, sizeof(szDisBuf)));
9110 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9111 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9112 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9113 : DISCPUMODE_64BIT;
9114 }
9115
9116 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9117 where the compilation was aborted before the opcode was recorded and the actual
9118 instruction was translated to a threaded call. This may happen when we run out
9119 of ranges, or when some complicated interrupts/FFs are found to be pending or
9120 similar. So, we just deal with it here rather than in the compiler code as it
9121 is a lot simpler to do here. */
9122 if ( idxRange == UINT8_MAX
9123 || idxRange >= cRanges
9124 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9125 {
9126 idxRange += 1;
9127 if (idxRange < cRanges)
9128 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9129 else
9130 continue;
9131 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9132 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9133 + (pTb->aRanges[idxRange].idxPhysPage == 0
9134 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9135 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9136 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9137 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9138 pTb->aRanges[idxRange].idxPhysPage);
9139 GCPhysPc += offRange;
9140 }
9141
9142 /* Disassemble the instruction. */
9143 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9144 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9145 uint32_t cbInstr = 1;
9146 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9147 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9148 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9149 if (RT_SUCCESS(rc))
9150 {
9151 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9152 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9153 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9154 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9155
9156 static unsigned const s_offMarker = 55;
9157 static char const s_szMarker[] = " ; <--- guest";
9158 if (cch < s_offMarker)
9159 {
9160 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9161 cch = s_offMarker;
9162 }
9163 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9164 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9165
9166 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9167 }
9168 else
9169 {
9170 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9171 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9172 cbInstr = 1;
9173 }
9174 GCPhysPc += cbInstr;
9175 offOpcodes += cbInstr;
9176 offRange += cbInstr;
9177 continue;
9178 }
9179
9180 case kIemTbDbgEntryType_ThreadedCall:
9181 pHlp->pfnPrintf(pHlp,
9182 " Call #%u to %s (%u args) - %s\n",
9183 idxThreadedCall,
9184 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9185 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9186 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9187 idxThreadedCall++;
9188 continue;
9189
9190 case kIemTbDbgEntryType_GuestRegShadowing:
9191 {
9192 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9193 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9194 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9195 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9196 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9197 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9198 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9199 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9200 else
9201 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9202 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9203 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9204 continue;
9205 }
9206
9207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9208 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9209 {
9210 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9211 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9212 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9213 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9214 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9215 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9216 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9217 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9218 else
9219 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9220 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9221 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9222 continue;
9223 }
9224#endif
9225
9226 case kIemTbDbgEntryType_Label:
9227 {
9228 const char *pszName = "what_the_fudge";
9229 const char *pszComment = "";
9230 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9231 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9232 {
9233 case kIemNativeLabelType_Return: pszName = "Return"; break;
9234 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9235 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9236 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9237 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9238 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9239 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9240 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9241 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9242 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9243 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9244 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9245 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9246 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9247 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9248 case kIemNativeLabelType_If:
9249 pszName = "If";
9250 fNumbered = true;
9251 break;
9252 case kIemNativeLabelType_Else:
9253 pszName = "Else";
9254 fNumbered = true;
9255 pszComment = " ; regs state restored pre-if-block";
9256 break;
9257 case kIemNativeLabelType_Endif:
9258 pszName = "Endif";
9259 fNumbered = true;
9260 break;
9261 case kIemNativeLabelType_CheckIrq:
9262 pszName = "CheckIrq_CheckVM";
9263 fNumbered = true;
9264 break;
9265 case kIemNativeLabelType_TlbLookup:
9266 pszName = "TlbLookup";
9267 fNumbered = true;
9268 break;
9269 case kIemNativeLabelType_TlbMiss:
9270 pszName = "TlbMiss";
9271 fNumbered = true;
9272 break;
9273 case kIemNativeLabelType_TlbDone:
9274 pszName = "TlbDone";
9275 fNumbered = true;
9276 break;
9277 case kIemNativeLabelType_Invalid:
9278 case kIemNativeLabelType_End:
9279 break;
9280 }
9281 if (fNumbered)
9282 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9283 else
9284 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9285 continue;
9286 }
9287
9288 case kIemTbDbgEntryType_NativeOffset:
9289 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9290 Assert(offDbgNativeNext > offNative);
9291 break;
9292
9293#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9294 case kIemTbDbgEntryType_DelayedPcUpdate:
9295 pHlp->pfnPrintf(pHlp,
9296 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9297 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9298 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9299 continue;
9300#endif
9301
9302 default:
9303 AssertFailed();
9304 }
9305 iDbgEntry++;
9306 break;
9307 }
9308 }
9309
9310 /*
9311 * Disassemble the next native instruction.
9312 */
9313 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9314# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9315 uint32_t cbInstr = sizeof(paNative[0]);
9316 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9317 if (RT_SUCCESS(rc))
9318 {
9319# if defined(RT_ARCH_AMD64)
9320 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9321 {
9322 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9323 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9324 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9325 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9326 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9327 uInfo & 0x8000 ? "recompiled" : "todo");
9328 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9329 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9330 else
9331 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9332 }
9333 else
9334# endif
9335 {
9336 const char *pszAnnotation = NULL;
9337# ifdef RT_ARCH_AMD64
9338 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9339 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9340 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9341 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9342 PCDISOPPARAM pMemOp;
9343 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9344 pMemOp = &Dis.Param1;
9345 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9346 pMemOp = &Dis.Param2;
9347 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9348 pMemOp = &Dis.Param3;
9349 else
9350 pMemOp = NULL;
9351 if ( pMemOp
9352 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9353 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9354 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9355 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9356
9357#elif defined(RT_ARCH_ARM64)
9358 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9359 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9360 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9361# else
9362# error "Port me"
9363# endif
9364 if (pszAnnotation)
9365 {
9366 static unsigned const s_offAnnotation = 55;
9367 size_t const cchAnnotation = strlen(pszAnnotation);
9368 size_t cchDis = strlen(szDisBuf);
9369 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9370 {
9371 if (cchDis < s_offAnnotation)
9372 {
9373 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9374 cchDis = s_offAnnotation;
9375 }
9376 szDisBuf[cchDis++] = ' ';
9377 szDisBuf[cchDis++] = ';';
9378 szDisBuf[cchDis++] = ' ';
9379 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9380 }
9381 }
9382 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9383 }
9384 }
9385 else
9386 {
9387# if defined(RT_ARCH_AMD64)
9388 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9389 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9390# elif defined(RT_ARCH_ARM64)
9391 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9392# else
9393# error "Port me"
9394# endif
9395 cbInstr = sizeof(paNative[0]);
9396 }
9397 offNative += cbInstr / sizeof(paNative[0]);
9398
9399# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9400 cs_insn *pInstr;
9401 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9402 (uintptr_t)pNativeCur, 1, &pInstr);
9403 if (cInstrs > 0)
9404 {
9405 Assert(cInstrs == 1);
9406 const char *pszAnnotation = NULL;
9407# if defined(RT_ARCH_ARM64)
9408 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9409 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9410 {
9411 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9412 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9413 char *psz = strchr(pInstr->op_str, '[');
9414 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9415 {
9416 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9417 int32_t off = -1;
9418 psz += 4;
9419 if (*psz == ']')
9420 off = 0;
9421 else if (*psz == ',')
9422 {
9423 psz = RTStrStripL(psz + 1);
9424 if (*psz == '#')
9425 off = RTStrToInt32(&psz[1]);
9426 /** @todo deal with index registers and LSL as well... */
9427 }
9428 if (off >= 0)
9429 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9430 }
9431 }
9432# endif
9433
9434 size_t const cchOp = strlen(pInstr->op_str);
9435# if defined(RT_ARCH_AMD64)
9436 if (pszAnnotation)
9437 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9438 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9439 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9440 else
9441 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9442 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9443
9444# else
9445 if (pszAnnotation)
9446 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9447 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9448 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9449 else
9450 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9451 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9452# endif
9453 offNative += pInstr->size / sizeof(*pNativeCur);
9454 cs_free(pInstr, cInstrs);
9455 }
9456 else
9457 {
9458# if defined(RT_ARCH_AMD64)
9459 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9460 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9461# else
9462 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9463# endif
9464 offNative++;
9465 }
9466# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9467 }
9468 }
9469 else
9470#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9471 {
9472 /*
9473 * No debug info, just disassemble the x86 code and then the native code.
9474 *
9475 * First the guest code:
9476 */
9477 for (unsigned i = 0; i < pTb->cRanges; i++)
9478 {
9479 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9480 + (pTb->aRanges[i].idxPhysPage == 0
9481 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9482 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9483 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9484 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9485 unsigned off = pTb->aRanges[i].offOpcodes;
9486 /** @todo this ain't working when crossing pages! */
9487 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9488 while (off < cbOpcodes)
9489 {
9490 uint32_t cbInstr = 1;
9491 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9492 &pTb->pabOpcodes[off], cbOpcodes - off,
9493 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9494 if (RT_SUCCESS(rc))
9495 {
9496 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9497 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9498 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9499 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9500 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9501 GCPhysPc += cbInstr;
9502 off += cbInstr;
9503 }
9504 else
9505 {
9506 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9507 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9508 break;
9509 }
9510 }
9511 }
9512
9513 /*
9514 * Then the native code:
9515 */
9516 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9517 while (offNative < cNative)
9518 {
9519 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9520# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9521 uint32_t cbInstr = sizeof(paNative[0]);
9522 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9523 if (RT_SUCCESS(rc))
9524 {
9525# if defined(RT_ARCH_AMD64)
9526 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9527 {
9528 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9529 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9530 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9531 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9532 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9533 uInfo & 0x8000 ? "recompiled" : "todo");
9534 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9535 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9536 else
9537 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9538 }
9539 else
9540# endif
9541 {
9542# ifdef RT_ARCH_AMD64
9543 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9544 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9545 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9546 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9547# elif defined(RT_ARCH_ARM64)
9548 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9549 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9550 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9551# else
9552# error "Port me"
9553# endif
9554 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9555 }
9556 }
9557 else
9558 {
9559# if defined(RT_ARCH_AMD64)
9560 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9561 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9562# else
9563 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9564# endif
9565 cbInstr = sizeof(paNative[0]);
9566 }
9567 offNative += cbInstr / sizeof(paNative[0]);
9568
9569# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9570 cs_insn *pInstr;
9571 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9572 (uintptr_t)pNativeCur, 1, &pInstr);
9573 if (cInstrs > 0)
9574 {
9575 Assert(cInstrs == 1);
9576# if defined(RT_ARCH_AMD64)
9577 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9578 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9579# else
9580 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9581 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9582# endif
9583 offNative += pInstr->size / sizeof(*pNativeCur);
9584 cs_free(pInstr, cInstrs);
9585 }
9586 else
9587 {
9588# if defined(RT_ARCH_AMD64)
9589 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9590 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9591# else
9592 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9593# endif
9594 offNative++;
9595 }
9596# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9597 }
9598 }
9599
9600#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9601 /* Cleanup. */
9602 cs_close(&hDisasm);
9603#endif
9604}
9605
9606
9607/**
9608 * Recompiles the given threaded TB into a native one.
9609 *
9610 * In case of failure the translation block will be returned as-is.
9611 *
9612 * @returns pTb.
9613 * @param pVCpu The cross context virtual CPU structure of the calling
9614 * thread.
9615 * @param pTb The threaded translation to recompile to native.
9616 */
9617DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9618{
9619 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9620
9621 /*
9622 * The first time thru, we allocate the recompiler state, the other times
9623 * we just need to reset it before using it again.
9624 */
9625 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9626 if (RT_LIKELY(pReNative))
9627 iemNativeReInit(pReNative, pTb);
9628 else
9629 {
9630 pReNative = iemNativeInit(pVCpu, pTb);
9631 AssertReturn(pReNative, pTb);
9632 }
9633
9634#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9635 /*
9636 * First do liveness analysis. This is done backwards.
9637 */
9638 {
9639 uint32_t idxCall = pTb->Thrd.cCalls;
9640 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9641 { /* likely */ }
9642 else
9643 {
9644 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9645 while (idxCall > cAlloc)
9646 cAlloc *= 2;
9647 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9648 AssertReturn(pvNew, pTb);
9649 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9650 pReNative->cLivenessEntriesAlloc = cAlloc;
9651 }
9652 AssertReturn(idxCall > 0, pTb);
9653 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9654
9655 /* The initial (final) entry. */
9656 idxCall--;
9657 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9658
9659 /* Loop backwards thru the calls and fill in the other entries. */
9660 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9661 while (idxCall > 0)
9662 {
9663 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9664 if (pfnLiveness)
9665 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9666 else
9667 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9668 pCallEntry--;
9669 idxCall--;
9670 }
9671
9672# ifdef VBOX_WITH_STATISTICS
9673 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9674 to 'clobbered' rather that 'input'. */
9675 /** @todo */
9676# endif
9677 }
9678#endif
9679
9680 /*
9681 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9682 * for aborting if an error happens.
9683 */
9684 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9685#ifdef LOG_ENABLED
9686 uint32_t const cCallsOrg = cCallsLeft;
9687#endif
9688 uint32_t off = 0;
9689 int rc = VINF_SUCCESS;
9690 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9691 {
9692 /*
9693 * Emit prolog code (fixed).
9694 */
9695 off = iemNativeEmitProlog(pReNative, off);
9696
9697 /*
9698 * Convert the calls to native code.
9699 */
9700#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9701 int32_t iGstInstr = -1;
9702#endif
9703#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9704 uint32_t cThreadedCalls = 0;
9705 uint32_t cRecompiledCalls = 0;
9706#endif
9707#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9708 uint32_t idxCurCall = 0;
9709#endif
9710 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9711 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9712 while (cCallsLeft-- > 0)
9713 {
9714 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9715#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9716 pReNative->idxCurCall = idxCurCall;
9717#endif
9718
9719 /*
9720 * Debug info, assembly markup and statistics.
9721 */
9722#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9723 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9724 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9725#endif
9726#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9727 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9728 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9729 {
9730 if (iGstInstr < (int32_t)pTb->cInstructions)
9731 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9732 else
9733 Assert(iGstInstr == pTb->cInstructions);
9734 iGstInstr = pCallEntry->idxInstr;
9735 }
9736 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9737#endif
9738#if defined(VBOX_STRICT)
9739 off = iemNativeEmitMarker(pReNative, off,
9740 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9741#endif
9742#if defined(VBOX_STRICT)
9743 iemNativeRegAssertSanity(pReNative);
9744#endif
9745#ifdef VBOX_WITH_STATISTICS
9746 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9747#endif
9748
9749 /*
9750 * Actual work.
9751 */
9752 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9753 pfnRecom ? "(recompiled)" : "(todo)"));
9754 if (pfnRecom) /** @todo stats on this. */
9755 {
9756 off = pfnRecom(pReNative, off, pCallEntry);
9757 STAM_REL_STATS({cRecompiledCalls++;});
9758 }
9759 else
9760 {
9761 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9762 STAM_REL_STATS({cThreadedCalls++;});
9763 }
9764 Assert(off <= pReNative->cInstrBufAlloc);
9765 Assert(pReNative->cCondDepth == 0);
9766
9767#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9768 if (LogIs2Enabled())
9769 {
9770 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9771# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9772 static const char s_achState[] = "CUXI";
9773# else
9774 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9775# endif
9776
9777 char szGpr[17];
9778 for (unsigned i = 0; i < 16; i++)
9779 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9780 szGpr[16] = '\0';
9781
9782 char szSegBase[X86_SREG_COUNT + 1];
9783 char szSegLimit[X86_SREG_COUNT + 1];
9784 char szSegAttrib[X86_SREG_COUNT + 1];
9785 char szSegSel[X86_SREG_COUNT + 1];
9786 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9787 {
9788 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9789 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9790 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9791 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9792 }
9793 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9794 = szSegSel[X86_SREG_COUNT] = '\0';
9795
9796 char szEFlags[8];
9797 for (unsigned i = 0; i < 7; i++)
9798 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9799 szEFlags[7] = '\0';
9800
9801 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9802 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9803 }
9804#endif
9805
9806 /*
9807 * Advance.
9808 */
9809 pCallEntry++;
9810#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9811 idxCurCall++;
9812#endif
9813 }
9814
9815 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9816 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9817 if (!cThreadedCalls)
9818 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9819
9820 /*
9821 * Emit the epilog code.
9822 */
9823 uint32_t idxReturnLabel;
9824 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9825
9826 /*
9827 * Generate special jump labels.
9828 */
9829 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9830 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9831 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9832 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9833
9834 /*
9835 * Generate simple TB tail labels that just calls a help with a pVCpu
9836 * arg and either return or longjmps/throws a non-zero status.
9837 *
9838 * The array entries must be ordered by enmLabel value so we can index
9839 * using fTailLabels bit numbers.
9840 */
9841 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9842 static struct
9843 {
9844 IEMNATIVELABELTYPE enmLabel;
9845 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9846 } const g_aSimpleTailLabels[] =
9847 {
9848 { kIemNativeLabelType_Invalid, NULL },
9849 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9850 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9851 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9852 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9853 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9854 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9855 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9856 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9857 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9858 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9859 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9860 };
9861 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9862 AssertCompile(kIemNativeLabelType_Invalid == 0);
9863 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9864 if (fTailLabels)
9865 {
9866 do
9867 {
9868 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9869 fTailLabels &= ~RT_BIT_64(enmLabel);
9870 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9871
9872 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9873 Assert(idxLabel != UINT32_MAX);
9874 if (idxLabel != UINT32_MAX)
9875 {
9876 iemNativeLabelDefine(pReNative, idxLabel, off);
9877
9878 /* int pfnCallback(PVMCPUCC pVCpu) */
9879 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9880 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9881
9882 /* jump back to the return sequence. */
9883 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9884 }
9885
9886 } while (fTailLabels);
9887 }
9888 }
9889 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9890 {
9891 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9892 return pTb;
9893 }
9894 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9895 Assert(off <= pReNative->cInstrBufAlloc);
9896
9897 /*
9898 * Make sure all labels has been defined.
9899 */
9900 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9901#ifdef VBOX_STRICT
9902 uint32_t const cLabels = pReNative->cLabels;
9903 for (uint32_t i = 0; i < cLabels; i++)
9904 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9905#endif
9906
9907 /*
9908 * Allocate executable memory, copy over the code we've generated.
9909 */
9910 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9911 if (pTbAllocator->pDelayedFreeHead)
9912 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9913
9914 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9915 AssertReturn(paFinalInstrBuf, pTb);
9916 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9917
9918 /*
9919 * Apply fixups.
9920 */
9921 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9922 uint32_t const cFixups = pReNative->cFixups;
9923 for (uint32_t i = 0; i < cFixups; i++)
9924 {
9925 Assert(paFixups[i].off < off);
9926 Assert(paFixups[i].idxLabel < cLabels);
9927 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9928 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9929 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9930 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9931 switch (paFixups[i].enmType)
9932 {
9933#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9934 case kIemNativeFixupType_Rel32:
9935 Assert(paFixups[i].off + 4 <= off);
9936 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9937 continue;
9938
9939#elif defined(RT_ARCH_ARM64)
9940 case kIemNativeFixupType_RelImm26At0:
9941 {
9942 Assert(paFixups[i].off < off);
9943 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9944 Assert(offDisp >= -262144 && offDisp < 262144);
9945 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9946 continue;
9947 }
9948
9949 case kIemNativeFixupType_RelImm19At5:
9950 {
9951 Assert(paFixups[i].off < off);
9952 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9953 Assert(offDisp >= -262144 && offDisp < 262144);
9954 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9955 continue;
9956 }
9957
9958 case kIemNativeFixupType_RelImm14At5:
9959 {
9960 Assert(paFixups[i].off < off);
9961 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9962 Assert(offDisp >= -8192 && offDisp < 8192);
9963 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9964 continue;
9965 }
9966
9967#endif
9968 case kIemNativeFixupType_Invalid:
9969 case kIemNativeFixupType_End:
9970 break;
9971 }
9972 AssertFailed();
9973 }
9974
9975 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9976 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9977
9978 /*
9979 * Convert the translation block.
9980 */
9981 RTMemFree(pTb->Thrd.paCalls);
9982 pTb->Native.paInstructions = paFinalInstrBuf;
9983 pTb->Native.cInstructions = off;
9984 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9985#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9986 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9987 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9988#endif
9989
9990 Assert(pTbAllocator->cThreadedTbs > 0);
9991 pTbAllocator->cThreadedTbs -= 1;
9992 pTbAllocator->cNativeTbs += 1;
9993 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9994
9995#ifdef LOG_ENABLED
9996 /*
9997 * Disassemble to the log if enabled.
9998 */
9999 if (LogIs3Enabled())
10000 {
10001 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10002 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10003# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10004 RTLogFlush(NULL);
10005# endif
10006 }
10007#endif
10008 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10009
10010 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10011 return pTb;
10012}
10013
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette