VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103917

Last change on this file since 103917 was 103916, checked in by vboxsync, 9 months ago

VMM/IEM: Convert iemMemStoreDataU256NoAc()/iemMemStoreDataU256NoAcJmp() to use the memory RW template and implement native emitters for IEM_MC_FETCH_MEM_U256_NO_AC()/IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 422.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103916 2024-03-19 13:11:09Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1874/**
1875 * Used by TB code to load 128-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1880 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1881#else
1882 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to load 128-bit data w/ segmentation.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1893 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1894#else
1895 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1896#endif
1897}
1898
1899
1900/**
1901 * Used by TB code to load 256-bit data w/ segmentation.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1906 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1907#else
1908 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1909#endif
1910}
1911#endif
1912
1913
1914/**
1915 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1918{
1919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1920 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1921#else
1922 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1923#endif
1924}
1925
1926
1927/**
1928 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1929 */
1930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1931{
1932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1933 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1934#else
1935 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1936#endif
1937}
1938
1939
1940/**
1941 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1946 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1947#else
1948 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1949#endif
1950}
1951
1952
1953/**
1954 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1955 */
1956IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1957{
1958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1959 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1960#else
1961 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1962#endif
1963}
1964
1965
1966
1967/**
1968 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1969 */
1970IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1971{
1972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1973 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1974#else
1975 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1976#endif
1977}
1978
1979
1980/**
1981 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1982 */
1983IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1984{
1985#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1986 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1987#else
1988 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1989#endif
1990}
1991
1992
1993/**
1994 * Used by TB code to store an 32-bit selector value onto a generic stack.
1995 *
1996 * Intel CPUs doesn't do write a whole dword, thus the special function.
1997 */
1998IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1999{
2000#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2001 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2002#else
2003 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2004#endif
2005}
2006
2007
2008/**
2009 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2014 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2015#else
2016 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2027 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2028#else
2029 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2036 */
2037IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2038{
2039#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2040 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2041#else
2042 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2043#endif
2044}
2045
2046
2047/**
2048 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2053 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2054#else
2055 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2056#endif
2057}
2058
2059
2060
2061/*********************************************************************************************************************************
2062* Helpers: Flat memory fetches and stores. *
2063*********************************************************************************************************************************/
2064
2065/**
2066 * Used by TB code to load unsigned 8-bit data w/ flat address.
2067 * @note Zero extending the value to 64-bit to simplify assembly.
2068 */
2069IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2070{
2071#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2072 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2073#else
2074 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2075#endif
2076}
2077
2078
2079/**
2080 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2081 * to 16 bits.
2082 * @note Zero extending the value to 64-bit to simplify assembly.
2083 */
2084IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2085{
2086#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2087 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2088#else
2089 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2090#endif
2091}
2092
2093
2094/**
2095 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2096 * to 32 bits.
2097 * @note Zero extending the value to 64-bit to simplify assembly.
2098 */
2099IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2100{
2101#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2102 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2103#else
2104 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2105#endif
2106}
2107
2108
2109/**
2110 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2111 * to 64 bits.
2112 */
2113IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2114{
2115#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2116 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2117#else
2118 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2119#endif
2120}
2121
2122
2123/**
2124 * Used by TB code to load unsigned 16-bit data w/ flat address.
2125 * @note Zero extending the value to 64-bit to simplify assembly.
2126 */
2127IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2128{
2129#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2130 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2131#else
2132 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2133#endif
2134}
2135
2136
2137/**
2138 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2139 * to 32 bits.
2140 * @note Zero extending the value to 64-bit to simplify assembly.
2141 */
2142IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2143{
2144#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2145 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2146#else
2147 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2148#endif
2149}
2150
2151
2152/**
2153 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2154 * to 64 bits.
2155 * @note Zero extending the value to 64-bit to simplify assembly.
2156 */
2157IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2158{
2159#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2160 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2161#else
2162 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2163#endif
2164}
2165
2166
2167/**
2168 * Used by TB code to load unsigned 32-bit data w/ flat address.
2169 * @note Zero extending the value to 64-bit to simplify assembly.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2172{
2173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2174 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2175#else
2176 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2177#endif
2178}
2179
2180
2181/**
2182 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2183 * to 64 bits.
2184 * @note Zero extending the value to 64-bit to simplify assembly.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2189 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2190#else
2191 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to load unsigned 64-bit data w/ flat address.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2202 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2203#else
2204 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2205#endif
2206}
2207
2208
2209#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2210/**
2211 * Used by TB code to load unsigned 128-bit data w/ flat address.
2212 */
2213IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2214{
2215#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2216 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2217#else
2218 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2219#endif
2220}
2221
2222
2223/**
2224 * Used by TB code to load unsigned 128-bit data w/ flat address.
2225 */
2226IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2227{
2228#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2229 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2230#else
2231 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2232#endif
2233}
2234
2235
2236/**
2237 * Used by TB code to load unsigned 256-bit data w/ flat address.
2238 */
2239IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2240{
2241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2242 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2243#else
2244 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2245#endif
2246}
2247#endif
2248
2249
2250/**
2251 * Used by TB code to store unsigned 8-bit data w/ flat address.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2256 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2257#else
2258 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to store unsigned 16-bit data w/ flat address.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2269 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2270#else
2271 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2272#endif
2273}
2274
2275
2276/**
2277 * Used by TB code to store unsigned 32-bit data w/ flat address.
2278 */
2279IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2280{
2281#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2282 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2283#else
2284 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2285#endif
2286}
2287
2288
2289/**
2290 * Used by TB code to store unsigned 64-bit data w/ flat address.
2291 */
2292IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2293{
2294#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2295 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2296#else
2297 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2298#endif
2299}
2300
2301
2302
2303/**
2304 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2305 */
2306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2307{
2308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2309 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2310#else
2311 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2312#endif
2313}
2314
2315
2316/**
2317 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2318 */
2319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2322 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2323#else
2324 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to store a segment selector value onto a flat stack.
2331 *
2332 * Intel CPUs doesn't do write a whole dword, thus the special function.
2333 */
2334IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2335{
2336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2337 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2338#else
2339 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2340#endif
2341}
2342
2343
2344/**
2345 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2346 */
2347IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2348{
2349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2350 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2351#else
2352 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2353#endif
2354}
2355
2356
2357/**
2358 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2361{
2362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2363 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2364#else
2365 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2366#endif
2367}
2368
2369
2370/**
2371 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2372 */
2373IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2374{
2375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2376 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2377#else
2378 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2379#endif
2380}
2381
2382
2383/**
2384 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2385 */
2386IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2387{
2388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2389 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2390#else
2391 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2392#endif
2393}
2394
2395
2396
2397/*********************************************************************************************************************************
2398* Helpers: Segmented memory mapping. *
2399*********************************************************************************************************************************/
2400
2401/**
2402 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2403 * segmentation.
2404 */
2405IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2406 RTGCPTR GCPtrMem, uint8_t iSegReg))
2407{
2408#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2409 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2410#else
2411 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2412#endif
2413}
2414
2415
2416/**
2417 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2418 */
2419IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2420 RTGCPTR GCPtrMem, uint8_t iSegReg))
2421{
2422#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2423 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2424#else
2425 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2426#endif
2427}
2428
2429
2430/**
2431 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2432 */
2433IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2434 RTGCPTR GCPtrMem, uint8_t iSegReg))
2435{
2436#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2437 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2438#else
2439 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2440#endif
2441}
2442
2443
2444/**
2445 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2446 */
2447IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2448 RTGCPTR GCPtrMem, uint8_t iSegReg))
2449{
2450#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2451 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2452#else
2453 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2454#endif
2455}
2456
2457
2458/**
2459 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2460 * segmentation.
2461 */
2462IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2463 RTGCPTR GCPtrMem, uint8_t iSegReg))
2464{
2465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2466 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#else
2468 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2469#endif
2470}
2471
2472
2473/**
2474 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2475 */
2476IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2477 RTGCPTR GCPtrMem, uint8_t iSegReg))
2478{
2479#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2480 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#else
2482 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2483#endif
2484}
2485
2486
2487/**
2488 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2489 */
2490IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2491 RTGCPTR GCPtrMem, uint8_t iSegReg))
2492{
2493#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2494 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2495#else
2496 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2497#endif
2498}
2499
2500
2501/**
2502 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2503 */
2504IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2505 RTGCPTR GCPtrMem, uint8_t iSegReg))
2506{
2507#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2508 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2509#else
2510 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2511#endif
2512}
2513
2514
2515/**
2516 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2517 * segmentation.
2518 */
2519IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2520 RTGCPTR GCPtrMem, uint8_t iSegReg))
2521{
2522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2523 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2524#else
2525 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2526#endif
2527}
2528
2529
2530/**
2531 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2532 */
2533IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2534 RTGCPTR GCPtrMem, uint8_t iSegReg))
2535{
2536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2537 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#else
2539 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2540#endif
2541}
2542
2543
2544/**
2545 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2546 */
2547IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2548 RTGCPTR GCPtrMem, uint8_t iSegReg))
2549{
2550#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2551 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2552#else
2553 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2554#endif
2555}
2556
2557
2558/**
2559 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2560 */
2561IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2562 RTGCPTR GCPtrMem, uint8_t iSegReg))
2563{
2564#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2565 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2566#else
2567 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2568#endif
2569}
2570
2571
2572/**
2573 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2574 * segmentation.
2575 */
2576IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2577 RTGCPTR GCPtrMem, uint8_t iSegReg))
2578{
2579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2580 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2581#else
2582 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2583#endif
2584}
2585
2586
2587/**
2588 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2589 */
2590IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2591 RTGCPTR GCPtrMem, uint8_t iSegReg))
2592{
2593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2594 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2595#else
2596 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2597#endif
2598}
2599
2600
2601/**
2602 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2603 */
2604IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2605 RTGCPTR GCPtrMem, uint8_t iSegReg))
2606{
2607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2608 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2609#else
2610 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2611#endif
2612}
2613
2614
2615/**
2616 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2617 */
2618IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2619 RTGCPTR GCPtrMem, uint8_t iSegReg))
2620{
2621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2622 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2623#else
2624 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2625#endif
2626}
2627
2628
2629/**
2630 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2631 */
2632IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2633 RTGCPTR GCPtrMem, uint8_t iSegReg))
2634{
2635#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2636 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2637#else
2638 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2639#endif
2640}
2641
2642
2643/**
2644 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2645 */
2646IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2647 RTGCPTR GCPtrMem, uint8_t iSegReg))
2648{
2649#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2650 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2651#else
2652 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2653#endif
2654}
2655
2656
2657/**
2658 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2659 * segmentation.
2660 */
2661IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2662 RTGCPTR GCPtrMem, uint8_t iSegReg))
2663{
2664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2665 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2666#else
2667 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2668#endif
2669}
2670
2671
2672/**
2673 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2674 */
2675IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2676 RTGCPTR GCPtrMem, uint8_t iSegReg))
2677{
2678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2679 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2680#else
2681 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2682#endif
2683}
2684
2685
2686/**
2687 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2688 */
2689IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2690 RTGCPTR GCPtrMem, uint8_t iSegReg))
2691{
2692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2693 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2694#else
2695 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2696#endif
2697}
2698
2699
2700/**
2701 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2702 */
2703IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2704 RTGCPTR GCPtrMem, uint8_t iSegReg))
2705{
2706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2707 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2708#else
2709 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2710#endif
2711}
2712
2713
2714/*********************************************************************************************************************************
2715* Helpers: Flat memory mapping. *
2716*********************************************************************************************************************************/
2717
2718/**
2719 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2720 * address.
2721 */
2722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2726#else
2727 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2734 */
2735IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2736{
2737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2738 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2739#else
2740 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2741#endif
2742}
2743
2744
2745/**
2746 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2747 */
2748IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2749{
2750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2751 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2752#else
2753 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2754#endif
2755}
2756
2757
2758/**
2759 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2773 * address.
2774 */
2775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2776{
2777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2778 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2779#else
2780 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2781#endif
2782}
2783
2784
2785/**
2786 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2787 */
2788IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2789{
2790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2791 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2792#else
2793 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2794#endif
2795}
2796
2797
2798/**
2799 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2800 */
2801IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2802{
2803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2804 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2805#else
2806 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2807#endif
2808}
2809
2810
2811/**
2812 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2813 */
2814IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2815{
2816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2817 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2818#else
2819 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2820#endif
2821}
2822
2823
2824/**
2825 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2826 * address.
2827 */
2828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2829{
2830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2831 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2832#else
2833 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2834#endif
2835}
2836
2837
2838/**
2839 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2840 */
2841IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2842{
2843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2844 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2845#else
2846 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2847#endif
2848}
2849
2850
2851/**
2852 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2853 */
2854IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2855{
2856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2857 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2858#else
2859 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2860#endif
2861}
2862
2863
2864/**
2865 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2866 */
2867IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2868{
2869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2870 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2871#else
2872 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2873#endif
2874}
2875
2876
2877/**
2878 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2879 * address.
2880 */
2881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2885#else
2886 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2887#endif
2888}
2889
2890
2891/**
2892 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2893 */
2894IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2895{
2896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2897 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2898#else
2899 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2900#endif
2901}
2902
2903
2904/**
2905 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2906 */
2907IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2908{
2909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2910 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2911#else
2912 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2913#endif
2914}
2915
2916
2917/**
2918 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2919 */
2920IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2921{
2922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2923 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2924#else
2925 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2926#endif
2927}
2928
2929
2930/**
2931 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2932 */
2933IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2934{
2935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2936 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2937#else
2938 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2939#endif
2940}
2941
2942
2943/**
2944 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2945 */
2946IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2947{
2948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2949 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2950#else
2951 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2952#endif
2953}
2954
2955
2956/**
2957 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2958 * address.
2959 */
2960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2961{
2962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2963 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2964#else
2965 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2966#endif
2967}
2968
2969
2970/**
2971 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2972 */
2973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2974{
2975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2976 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2977#else
2978 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2979#endif
2980}
2981
2982
2983/**
2984 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2985 */
2986IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2987{
2988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2989 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2990#else
2991 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2992#endif
2993}
2994
2995
2996/**
2997 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2998 */
2999IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3000{
3001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3002 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3003#else
3004 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3005#endif
3006}
3007
3008
3009/*********************************************************************************************************************************
3010* Helpers: Commit, rollback & unmap *
3011*********************************************************************************************************************************/
3012
3013/**
3014 * Used by TB code to commit and unmap a read-write memory mapping.
3015 */
3016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3017{
3018 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3019}
3020
3021
3022/**
3023 * Used by TB code to commit and unmap a read-write memory mapping.
3024 */
3025IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3026{
3027 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3028}
3029
3030
3031/**
3032 * Used by TB code to commit and unmap a write-only memory mapping.
3033 */
3034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3035{
3036 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3037}
3038
3039
3040/**
3041 * Used by TB code to commit and unmap a read-only memory mapping.
3042 */
3043IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3044{
3045 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3046}
3047
3048
3049/**
3050 * Reinitializes the native recompiler state.
3051 *
3052 * Called before starting a new recompile job.
3053 */
3054static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3055{
3056 pReNative->cLabels = 0;
3057 pReNative->bmLabelTypes = 0;
3058 pReNative->cFixups = 0;
3059#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3060 pReNative->pDbgInfo->cEntries = 0;
3061#endif
3062 pReNative->pTbOrg = pTb;
3063 pReNative->cCondDepth = 0;
3064 pReNative->uCondSeqNo = 0;
3065 pReNative->uCheckIrqSeqNo = 0;
3066 pReNative->uTlbSeqNo = 0;
3067
3068#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3069 pReNative->Core.offPc = 0;
3070 pReNative->Core.cInstrPcUpdateSkipped = 0;
3071#endif
3072#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3073 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3074#endif
3075 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3076#if IEMNATIVE_HST_GREG_COUNT < 32
3077 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3078#endif
3079 ;
3080 pReNative->Core.bmHstRegsWithGstShadow = 0;
3081 pReNative->Core.bmGstRegShadows = 0;
3082 pReNative->Core.bmVars = 0;
3083 pReNative->Core.bmStack = 0;
3084 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3085 pReNative->Core.u64ArgVars = UINT64_MAX;
3086
3087 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3088 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3089 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3090 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3091 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3092 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3093 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3094 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3095 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3096 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3097 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3098 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3099 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3100 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3101 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3102 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3103 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3104
3105 /* Full host register reinit: */
3106 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3107 {
3108 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3109 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3110 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3111 }
3112
3113 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3114 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3115#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3116 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3117#endif
3118#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3119 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3120#endif
3121#ifdef IEMNATIVE_REG_FIXED_TMP1
3122 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3123#endif
3124#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3125 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3126#endif
3127 );
3128 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3129 {
3130 fRegs &= ~RT_BIT_32(idxReg);
3131 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3132 }
3133
3134 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3135#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3136 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3137#endif
3138#ifdef IEMNATIVE_REG_FIXED_TMP0
3139 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3140#endif
3141#ifdef IEMNATIVE_REG_FIXED_TMP1
3142 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3143#endif
3144#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3145 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3146#endif
3147
3148#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3149# ifdef RT_ARCH_ARM64
3150 /*
3151 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3152 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3153 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3154 * and the register allocator assumes that it will be always free when the lower is picked.
3155 */
3156 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3157# else
3158 uint32_t const fFixedAdditional = 0;
3159# endif
3160
3161 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3162 | fFixedAdditional
3163# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3164 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3165# endif
3166 ;
3167 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3168 pReNative->Core.bmGstSimdRegShadows = 0;
3169 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3170 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3171
3172 /* Full host register reinit: */
3173 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3174 {
3175 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3176 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3177 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3178 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3179 }
3180
3181 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3182 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3183 {
3184 fRegs &= ~RT_BIT_32(idxReg);
3185 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3186 }
3187
3188#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3189 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3190#endif
3191
3192#endif
3193
3194 return pReNative;
3195}
3196
3197
3198/**
3199 * Allocates and initializes the native recompiler state.
3200 *
3201 * This is called the first time an EMT wants to recompile something.
3202 *
3203 * @returns Pointer to the new recompiler state.
3204 * @param pVCpu The cross context virtual CPU structure of the calling
3205 * thread.
3206 * @param pTb The TB that's about to be recompiled.
3207 * @thread EMT(pVCpu)
3208 */
3209static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3210{
3211 VMCPU_ASSERT_EMT(pVCpu);
3212
3213 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3214 AssertReturn(pReNative, NULL);
3215
3216 /*
3217 * Try allocate all the buffers and stuff we need.
3218 */
3219 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3220 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3221 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3222#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3223 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3224#endif
3225 if (RT_LIKELY( pReNative->pInstrBuf
3226 && pReNative->paLabels
3227 && pReNative->paFixups)
3228#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3229 && pReNative->pDbgInfo
3230#endif
3231 )
3232 {
3233 /*
3234 * Set the buffer & array sizes on success.
3235 */
3236 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3237 pReNative->cLabelsAlloc = _8K;
3238 pReNative->cFixupsAlloc = _16K;
3239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3240 pReNative->cDbgInfoAlloc = _16K;
3241#endif
3242
3243 /* Other constant stuff: */
3244 pReNative->pVCpu = pVCpu;
3245
3246 /*
3247 * Done, just need to save it and reinit it.
3248 */
3249 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3250 return iemNativeReInit(pReNative, pTb);
3251 }
3252
3253 /*
3254 * Failed. Cleanup and return.
3255 */
3256 AssertFailed();
3257 RTMemFree(pReNative->pInstrBuf);
3258 RTMemFree(pReNative->paLabels);
3259 RTMemFree(pReNative->paFixups);
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 RTMemFree(pReNative->pDbgInfo);
3262#endif
3263 RTMemFree(pReNative);
3264 return NULL;
3265}
3266
3267
3268/**
3269 * Creates a label
3270 *
3271 * If the label does not yet have a defined position,
3272 * call iemNativeLabelDefine() later to set it.
3273 *
3274 * @returns Label ID. Throws VBox status code on failure, so no need to check
3275 * the return value.
3276 * @param pReNative The native recompile state.
3277 * @param enmType The label type.
3278 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3279 * label is not yet defined (default).
3280 * @param uData Data associated with the lable. Only applicable to
3281 * certain type of labels. Default is zero.
3282 */
3283DECL_HIDDEN_THROW(uint32_t)
3284iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3285 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3286{
3287 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3288
3289 /*
3290 * Locate existing label definition.
3291 *
3292 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3293 * and uData is zero.
3294 */
3295 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3296 uint32_t const cLabels = pReNative->cLabels;
3297 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3298#ifndef VBOX_STRICT
3299 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3300 && offWhere == UINT32_MAX
3301 && uData == 0
3302#endif
3303 )
3304 {
3305#ifndef VBOX_STRICT
3306 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3307 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3308 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3309 if (idxLabel < pReNative->cLabels)
3310 return idxLabel;
3311#else
3312 for (uint32_t i = 0; i < cLabels; i++)
3313 if ( paLabels[i].enmType == enmType
3314 && paLabels[i].uData == uData)
3315 {
3316 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3317 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3318 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3319 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3320 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3321 return i;
3322 }
3323 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3324 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3325#endif
3326 }
3327
3328 /*
3329 * Make sure we've got room for another label.
3330 */
3331 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3332 { /* likely */ }
3333 else
3334 {
3335 uint32_t cNew = pReNative->cLabelsAlloc;
3336 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3337 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3338 cNew *= 2;
3339 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3340 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3341 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3342 pReNative->paLabels = paLabels;
3343 pReNative->cLabelsAlloc = cNew;
3344 }
3345
3346 /*
3347 * Define a new label.
3348 */
3349 paLabels[cLabels].off = offWhere;
3350 paLabels[cLabels].enmType = enmType;
3351 paLabels[cLabels].uData = uData;
3352 pReNative->cLabels = cLabels + 1;
3353
3354 Assert((unsigned)enmType < 64);
3355 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3356
3357 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3358 {
3359 Assert(uData == 0);
3360 pReNative->aidxUniqueLabels[enmType] = cLabels;
3361 }
3362
3363 if (offWhere != UINT32_MAX)
3364 {
3365#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3366 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3367 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3368#endif
3369 }
3370 return cLabels;
3371}
3372
3373
3374/**
3375 * Defines the location of an existing label.
3376 *
3377 * @param pReNative The native recompile state.
3378 * @param idxLabel The label to define.
3379 * @param offWhere The position.
3380 */
3381DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3382{
3383 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3384 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3385 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3386 pLabel->off = offWhere;
3387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3388 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3389 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3390#endif
3391}
3392
3393
3394/**
3395 * Looks up a lable.
3396 *
3397 * @returns Label ID if found, UINT32_MAX if not.
3398 */
3399static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3400 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3401{
3402 Assert((unsigned)enmType < 64);
3403 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3404 {
3405 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3406 return pReNative->aidxUniqueLabels[enmType];
3407
3408 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3409 uint32_t const cLabels = pReNative->cLabels;
3410 for (uint32_t i = 0; i < cLabels; i++)
3411 if ( paLabels[i].enmType == enmType
3412 && paLabels[i].uData == uData
3413 && ( paLabels[i].off == offWhere
3414 || offWhere == UINT32_MAX
3415 || paLabels[i].off == UINT32_MAX))
3416 return i;
3417 }
3418 return UINT32_MAX;
3419}
3420
3421
3422/**
3423 * Adds a fixup.
3424 *
3425 * @throws VBox status code (int) on failure.
3426 * @param pReNative The native recompile state.
3427 * @param offWhere The instruction offset of the fixup location.
3428 * @param idxLabel The target label ID for the fixup.
3429 * @param enmType The fixup type.
3430 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3431 */
3432DECL_HIDDEN_THROW(void)
3433iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3434 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3435{
3436 Assert(idxLabel <= UINT16_MAX);
3437 Assert((unsigned)enmType <= UINT8_MAX);
3438#ifdef RT_ARCH_ARM64
3439 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3440 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3441 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3442#endif
3443
3444 /*
3445 * Make sure we've room.
3446 */
3447 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3448 uint32_t const cFixups = pReNative->cFixups;
3449 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3450 { /* likely */ }
3451 else
3452 {
3453 uint32_t cNew = pReNative->cFixupsAlloc;
3454 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3455 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3456 cNew *= 2;
3457 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3458 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3459 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3460 pReNative->paFixups = paFixups;
3461 pReNative->cFixupsAlloc = cNew;
3462 }
3463
3464 /*
3465 * Add the fixup.
3466 */
3467 paFixups[cFixups].off = offWhere;
3468 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3469 paFixups[cFixups].enmType = enmType;
3470 paFixups[cFixups].offAddend = offAddend;
3471 pReNative->cFixups = cFixups + 1;
3472}
3473
3474
3475/**
3476 * Slow code path for iemNativeInstrBufEnsure.
3477 */
3478DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3479{
3480 /* Double the buffer size till we meet the request. */
3481 uint32_t cNew = pReNative->cInstrBufAlloc;
3482 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3483 do
3484 cNew *= 2;
3485 while (cNew < off + cInstrReq);
3486
3487 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3488#ifdef RT_ARCH_ARM64
3489 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3490#else
3491 uint32_t const cbMaxInstrBuf = _2M;
3492#endif
3493 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3494
3495 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3496 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3497
3498#ifdef VBOX_STRICT
3499 pReNative->offInstrBufChecked = off + cInstrReq;
3500#endif
3501 pReNative->cInstrBufAlloc = cNew;
3502 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3503}
3504
3505#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3506
3507/**
3508 * Grows the static debug info array used during recompilation.
3509 *
3510 * @returns Pointer to the new debug info block; throws VBox status code on
3511 * failure, so no need to check the return value.
3512 */
3513DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3514{
3515 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3516 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3517 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3518 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3519 pReNative->pDbgInfo = pDbgInfo;
3520 pReNative->cDbgInfoAlloc = cNew;
3521 return pDbgInfo;
3522}
3523
3524
3525/**
3526 * Adds a new debug info uninitialized entry, returning the pointer to it.
3527 */
3528DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3529{
3530 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3531 { /* likely */ }
3532 else
3533 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3534 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3535}
3536
3537
3538/**
3539 * Debug Info: Adds a native offset record, if necessary.
3540 */
3541DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3542{
3543 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3544
3545 /*
3546 * Search backwards to see if we've got a similar record already.
3547 */
3548 uint32_t idx = pDbgInfo->cEntries;
3549 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3550 while (idx-- > idxStop)
3551 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3552 {
3553 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3554 return;
3555 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3556 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3557 break;
3558 }
3559
3560 /*
3561 * Add it.
3562 */
3563 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3564 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3565 pEntry->NativeOffset.offNative = off;
3566}
3567
3568
3569/**
3570 * Debug Info: Record info about a label.
3571 */
3572static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3573{
3574 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3575 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3576 pEntry->Label.uUnused = 0;
3577 pEntry->Label.enmLabel = (uint8_t)enmType;
3578 pEntry->Label.uData = uData;
3579}
3580
3581
3582/**
3583 * Debug Info: Record info about a threaded call.
3584 */
3585static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3586{
3587 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3588 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3589 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3590 pEntry->ThreadedCall.uUnused = 0;
3591 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3592}
3593
3594
3595/**
3596 * Debug Info: Record info about a new guest instruction.
3597 */
3598static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3599{
3600 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3601 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3602 pEntry->GuestInstruction.uUnused = 0;
3603 pEntry->GuestInstruction.fExec = fExec;
3604}
3605
3606
3607/**
3608 * Debug Info: Record info about guest register shadowing.
3609 */
3610DECL_HIDDEN_THROW(void)
3611iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3612 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3613{
3614 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3615 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3616 pEntry->GuestRegShadowing.uUnused = 0;
3617 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3618 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3619 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3620}
3621
3622
3623# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3624/**
3625 * Debug Info: Record info about guest register shadowing.
3626 */
3627DECL_HIDDEN_THROW(void)
3628iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3629 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3630{
3631 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3632 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3633 pEntry->GuestSimdRegShadowing.uUnused = 0;
3634 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3635 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3636 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3637}
3638# endif
3639
3640
3641# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3642/**
3643 * Debug Info: Record info about delayed RIP updates.
3644 */
3645DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3646{
3647 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3648 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3649 pEntry->DelayedPcUpdate.offPc = offPc;
3650 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3651}
3652# endif
3653
3654#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3655
3656
3657/*********************************************************************************************************************************
3658* Register Allocator *
3659*********************************************************************************************************************************/
3660
3661/**
3662 * Register parameter indexes (indexed by argument number).
3663 */
3664DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3665{
3666 IEMNATIVE_CALL_ARG0_GREG,
3667 IEMNATIVE_CALL_ARG1_GREG,
3668 IEMNATIVE_CALL_ARG2_GREG,
3669 IEMNATIVE_CALL_ARG3_GREG,
3670#if defined(IEMNATIVE_CALL_ARG4_GREG)
3671 IEMNATIVE_CALL_ARG4_GREG,
3672# if defined(IEMNATIVE_CALL_ARG5_GREG)
3673 IEMNATIVE_CALL_ARG5_GREG,
3674# if defined(IEMNATIVE_CALL_ARG6_GREG)
3675 IEMNATIVE_CALL_ARG6_GREG,
3676# if defined(IEMNATIVE_CALL_ARG7_GREG)
3677 IEMNATIVE_CALL_ARG7_GREG,
3678# endif
3679# endif
3680# endif
3681#endif
3682};
3683AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3684
3685/**
3686 * Call register masks indexed by argument count.
3687 */
3688DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3689{
3690 0,
3691 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3692 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3693 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3694 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3695 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3696#if defined(IEMNATIVE_CALL_ARG4_GREG)
3697 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3698 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3699# if defined(IEMNATIVE_CALL_ARG5_GREG)
3700 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3701 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3702# if defined(IEMNATIVE_CALL_ARG6_GREG)
3703 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3704 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3705 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3706# if defined(IEMNATIVE_CALL_ARG7_GREG)
3707 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3708 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3709 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3710# endif
3711# endif
3712# endif
3713#endif
3714};
3715
3716#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3717/**
3718 * BP offset of the stack argument slots.
3719 *
3720 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3721 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3722 */
3723DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3724{
3725 IEMNATIVE_FP_OFF_STACK_ARG0,
3726# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3727 IEMNATIVE_FP_OFF_STACK_ARG1,
3728# endif
3729# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3730 IEMNATIVE_FP_OFF_STACK_ARG2,
3731# endif
3732# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3733 IEMNATIVE_FP_OFF_STACK_ARG3,
3734# endif
3735};
3736AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3737#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3738
3739/**
3740 * Info about shadowed guest register values.
3741 * @see IEMNATIVEGSTREG
3742 */
3743DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3744{
3745#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3746 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3747 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3748 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3749 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3750 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3751 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3752 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3753 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3754 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3755 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3756 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3757 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3758 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3759 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3760 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3761 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3762 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3763 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3764 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3765 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3766 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3767 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3768 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3769 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3770 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3771 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3772 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3773 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3774 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3775 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3776 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3777 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3778 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3779 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3780 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3781 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3782 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3783 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3784 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3785 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3786 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3787 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3788 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3789 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3790 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3791 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3792 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3793 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3794#undef CPUMCTX_OFF_AND_SIZE
3795};
3796AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3797
3798
3799/** Host CPU general purpose register names. */
3800DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3801{
3802#ifdef RT_ARCH_AMD64
3803 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3804#elif RT_ARCH_ARM64
3805 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3806 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3807#else
3808# error "port me"
3809#endif
3810};
3811
3812
3813#if 0 /* unused */
3814/**
3815 * Tries to locate a suitable register in the given register mask.
3816 *
3817 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3818 * failed.
3819 *
3820 * @returns Host register number on success, returns UINT8_MAX on failure.
3821 */
3822static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3823{
3824 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3825 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3826 if (fRegs)
3827 {
3828 /** @todo pick better here: */
3829 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3830
3831 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3832 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3833 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3834 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3835
3836 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3837 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3838 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3839 return idxReg;
3840 }
3841 return UINT8_MAX;
3842}
3843#endif /* unused */
3844
3845
3846/**
3847 * Locate a register, possibly freeing one up.
3848 *
3849 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3850 * failed.
3851 *
3852 * @returns Host register number on success. Returns UINT8_MAX if no registers
3853 * found, the caller is supposed to deal with this and raise a
3854 * allocation type specific status code (if desired).
3855 *
3856 * @throws VBox status code if we're run into trouble spilling a variable of
3857 * recording debug info. Does NOT throw anything if we're out of
3858 * registers, though.
3859 */
3860static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3861 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3862{
3863 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3864 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3865 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3866
3867 /*
3868 * Try a freed register that's shadowing a guest register.
3869 */
3870 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3871 if (fRegs)
3872 {
3873 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3874
3875#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3876 /*
3877 * When we have livness information, we use it to kick out all shadowed
3878 * guest register that will not be needed any more in this TB. If we're
3879 * lucky, this may prevent us from ending up here again.
3880 *
3881 * Note! We must consider the previous entry here so we don't free
3882 * anything that the current threaded function requires (current
3883 * entry is produced by the next threaded function).
3884 */
3885 uint32_t const idxCurCall = pReNative->idxCurCall;
3886 if (idxCurCall > 0)
3887 {
3888 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3889
3890# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3891 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3892 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3893 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3894#else
3895 /* Construct a mask of the registers not in the read or write state.
3896 Note! We could skips writes, if they aren't from us, as this is just
3897 a hack to prevent trashing registers that have just been written
3898 or will be written when we retire the current instruction. */
3899 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3900 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3901 & IEMLIVENESSBIT_MASK;
3902#endif
3903 /* Merge EFLAGS. */
3904 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3905 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3906 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3907 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3908 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3909
3910 /* If it matches any shadowed registers. */
3911 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3912 {
3913 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3914 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3915 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3916
3917 /* See if we've got any unshadowed registers we can return now. */
3918 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3919 if (fUnshadowedRegs)
3920 {
3921 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3922 return (fPreferVolatile
3923 ? ASMBitFirstSetU32(fUnshadowedRegs)
3924 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3925 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3926 - 1;
3927 }
3928 }
3929 }
3930#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3931
3932 unsigned const idxReg = (fPreferVolatile
3933 ? ASMBitFirstSetU32(fRegs)
3934 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3935 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3936 - 1;
3937
3938 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3939 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3940 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3941 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3942
3943 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3944 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3945 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3946 return idxReg;
3947 }
3948
3949 /*
3950 * Try free up a variable that's in a register.
3951 *
3952 * We do two rounds here, first evacuating variables we don't need to be
3953 * saved on the stack, then in the second round move things to the stack.
3954 */
3955 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3956 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3957 {
3958 uint32_t fVars = pReNative->Core.bmVars;
3959 while (fVars)
3960 {
3961 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3962 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3963 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3964 && (RT_BIT_32(idxReg) & fRegMask)
3965 && ( iLoop == 0
3966 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3967 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3968 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3969 {
3970 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3971 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3972 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3973 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3974 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3975 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3976
3977 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3978 {
3979 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3980 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3981 }
3982
3983 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3984 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3985
3986 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3987 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3988 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3989 return idxReg;
3990 }
3991 fVars &= ~RT_BIT_32(idxVar);
3992 }
3993 }
3994
3995 return UINT8_MAX;
3996}
3997
3998
3999/**
4000 * Reassigns a variable to a different register specified by the caller.
4001 *
4002 * @returns The new code buffer position.
4003 * @param pReNative The native recompile state.
4004 * @param off The current code buffer position.
4005 * @param idxVar The variable index.
4006 * @param idxRegOld The old host register number.
4007 * @param idxRegNew The new host register number.
4008 * @param pszCaller The caller for logging.
4009 */
4010static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4011 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4012{
4013 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4014 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4015#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4016 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4017#endif
4018 RT_NOREF(pszCaller);
4019
4020 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4021
4022 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4023 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4024 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4025 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4026
4027 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4028 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4029 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4030 if (fGstRegShadows)
4031 {
4032 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4033 | RT_BIT_32(idxRegNew);
4034 while (fGstRegShadows)
4035 {
4036 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4037 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4038
4039 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4040 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4041 }
4042 }
4043
4044 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4045 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4046 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4047 return off;
4048}
4049
4050
4051/**
4052 * Moves a variable to a different register or spills it onto the stack.
4053 *
4054 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4055 * kinds can easily be recreated if needed later.
4056 *
4057 * @returns The new code buffer position.
4058 * @param pReNative The native recompile state.
4059 * @param off The current code buffer position.
4060 * @param idxVar The variable index.
4061 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4062 * call-volatile registers.
4063 */
4064DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4065 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4066{
4067 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4068 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4069 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4070 Assert(!pVar->fRegAcquired);
4071
4072 uint8_t const idxRegOld = pVar->idxReg;
4073 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4074 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4075 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4076 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4077 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4078 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4079 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4080 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4081
4082
4083 /** @todo Add statistics on this.*/
4084 /** @todo Implement basic variable liveness analysis (python) so variables
4085 * can be freed immediately once no longer used. This has the potential to
4086 * be trashing registers and stack for dead variables.
4087 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4088
4089 /*
4090 * First try move it to a different register, as that's cheaper.
4091 */
4092 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4093 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4094 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4095 if (fRegs)
4096 {
4097 /* Avoid using shadow registers, if possible. */
4098 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4099 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4100 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4101 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4102 }
4103
4104 /*
4105 * Otherwise we must spill the register onto the stack.
4106 */
4107 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4108 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4109 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4110 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4111
4112 pVar->idxReg = UINT8_MAX;
4113 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4114 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4115 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4116 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4117 return off;
4118}
4119
4120
4121/**
4122 * Allocates a temporary host general purpose register.
4123 *
4124 * This may emit code to save register content onto the stack in order to free
4125 * up a register.
4126 *
4127 * @returns The host register number; throws VBox status code on failure,
4128 * so no need to check the return value.
4129 * @param pReNative The native recompile state.
4130 * @param poff Pointer to the variable with the code buffer position.
4131 * This will be update if we need to move a variable from
4132 * register to stack in order to satisfy the request.
4133 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4134 * registers (@c true, default) or the other way around
4135 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4136 */
4137DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4138{
4139 /*
4140 * Try find a completely unused register, preferably a call-volatile one.
4141 */
4142 uint8_t idxReg;
4143 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4144 & ~pReNative->Core.bmHstRegsWithGstShadow
4145 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4146 if (fRegs)
4147 {
4148 if (fPreferVolatile)
4149 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4150 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4151 else
4152 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4153 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4154 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4155 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4156 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4157 }
4158 else
4159 {
4160 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4161 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4162 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4163 }
4164 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4165}
4166
4167
4168/**
4169 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4170 * registers.
4171 *
4172 * @returns The host register number; throws VBox status code on failure,
4173 * so no need to check the return value.
4174 * @param pReNative The native recompile state.
4175 * @param poff Pointer to the variable with the code buffer position.
4176 * This will be update if we need to move a variable from
4177 * register to stack in order to satisfy the request.
4178 * @param fRegMask Mask of acceptable registers.
4179 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4180 * registers (@c true, default) or the other way around
4181 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4182 */
4183DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4184 bool fPreferVolatile /*= true*/)
4185{
4186 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4187 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4188
4189 /*
4190 * Try find a completely unused register, preferably a call-volatile one.
4191 */
4192 uint8_t idxReg;
4193 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4194 & ~pReNative->Core.bmHstRegsWithGstShadow
4195 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4196 & fRegMask;
4197 if (fRegs)
4198 {
4199 if (fPreferVolatile)
4200 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4201 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4202 else
4203 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4204 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4205 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4206 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4207 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4208 }
4209 else
4210 {
4211 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4212 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4213 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4214 }
4215 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4216}
4217
4218
4219/**
4220 * Allocates a temporary register for loading an immediate value into.
4221 *
4222 * This will emit code to load the immediate, unless there happens to be an
4223 * unused register with the value already loaded.
4224 *
4225 * The caller will not modify the returned register, it must be considered
4226 * read-only. Free using iemNativeRegFreeTmpImm.
4227 *
4228 * @returns The host register number; throws VBox status code on failure, so no
4229 * need to check the return value.
4230 * @param pReNative The native recompile state.
4231 * @param poff Pointer to the variable with the code buffer position.
4232 * @param uImm The immediate value that the register must hold upon
4233 * return.
4234 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4235 * registers (@c true, default) or the other way around
4236 * (@c false).
4237 *
4238 * @note Reusing immediate values has not been implemented yet.
4239 */
4240DECL_HIDDEN_THROW(uint8_t)
4241iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4242{
4243 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4244 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4245 return idxReg;
4246}
4247
4248
4249/**
4250 * Allocates a temporary host general purpose register for keeping a guest
4251 * register value.
4252 *
4253 * Since we may already have a register holding the guest register value,
4254 * code will be emitted to do the loading if that's not the case. Code may also
4255 * be emitted if we have to free up a register to satify the request.
4256 *
4257 * @returns The host register number; throws VBox status code on failure, so no
4258 * need to check the return value.
4259 * @param pReNative The native recompile state.
4260 * @param poff Pointer to the variable with the code buffer
4261 * position. This will be update if we need to move a
4262 * variable from register to stack in order to satisfy
4263 * the request.
4264 * @param enmGstReg The guest register that will is to be updated.
4265 * @param enmIntendedUse How the caller will be using the host register.
4266 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4267 * register is okay (default). The ASSUMPTION here is
4268 * that the caller has already flushed all volatile
4269 * registers, so this is only applied if we allocate a
4270 * new register.
4271 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4272 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4273 */
4274DECL_HIDDEN_THROW(uint8_t)
4275iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4276 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4277 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4278{
4279 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4280#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4281 AssertMsg( fSkipLivenessAssert
4282 || pReNative->idxCurCall == 0
4283 || enmGstReg == kIemNativeGstReg_Pc
4284 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4285 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4286 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4287 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4288 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4289 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4290#endif
4291 RT_NOREF(fSkipLivenessAssert);
4292#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4293 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4294#endif
4295 uint32_t const fRegMask = !fNoVolatileRegs
4296 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4297 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4298
4299 /*
4300 * First check if the guest register value is already in a host register.
4301 */
4302 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4303 {
4304 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4305 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4306 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4307 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4308
4309 /* It's not supposed to be allocated... */
4310 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4311 {
4312 /*
4313 * If the register will trash the guest shadow copy, try find a
4314 * completely unused register we can use instead. If that fails,
4315 * we need to disassociate the host reg from the guest reg.
4316 */
4317 /** @todo would be nice to know if preserving the register is in any way helpful. */
4318 /* If the purpose is calculations, try duplicate the register value as
4319 we'll be clobbering the shadow. */
4320 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4321 && ( ~pReNative->Core.bmHstRegs
4322 & ~pReNative->Core.bmHstRegsWithGstShadow
4323 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4324 {
4325 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4326
4327 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4328
4329 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4330 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4331 g_apszIemNativeHstRegNames[idxRegNew]));
4332 idxReg = idxRegNew;
4333 }
4334 /* If the current register matches the restrictions, go ahead and allocate
4335 it for the caller. */
4336 else if (fRegMask & RT_BIT_32(idxReg))
4337 {
4338 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4339 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4340 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4341 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4342 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4343 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4344 else
4345 {
4346 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4347 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4348 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4349 }
4350 }
4351 /* Otherwise, allocate a register that satisfies the caller and transfer
4352 the shadowing if compatible with the intended use. (This basically
4353 means the call wants a non-volatile register (RSP push/pop scenario).) */
4354 else
4355 {
4356 Assert(fNoVolatileRegs);
4357 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4358 !fNoVolatileRegs
4359 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4360 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4361 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4362 {
4363 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4364 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4365 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4366 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4367 }
4368 else
4369 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4370 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4371 g_apszIemNativeHstRegNames[idxRegNew]));
4372 idxReg = idxRegNew;
4373 }
4374 }
4375 else
4376 {
4377 /*
4378 * Oops. Shadowed guest register already allocated!
4379 *
4380 * Allocate a new register, copy the value and, if updating, the
4381 * guest shadow copy assignment to the new register.
4382 */
4383 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4384 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4385 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4386 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4387
4388 /** @todo share register for readonly access. */
4389 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4390 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4391
4392 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4393 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4394
4395 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4396 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4397 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4398 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4399 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4400 else
4401 {
4402 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4403 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4404 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4405 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4406 }
4407 idxReg = idxRegNew;
4408 }
4409 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4410
4411#ifdef VBOX_STRICT
4412 /* Strict builds: Check that the value is correct. */
4413 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4414#endif
4415
4416 return idxReg;
4417 }
4418
4419 /*
4420 * Allocate a new register, load it with the guest value and designate it as a copy of the
4421 */
4422 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4423
4424 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4425 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4426
4427 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4428 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4429 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4430 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4431
4432 return idxRegNew;
4433}
4434
4435
4436/**
4437 * Allocates a temporary host general purpose register that already holds the
4438 * given guest register value.
4439 *
4440 * The use case for this function is places where the shadowing state cannot be
4441 * modified due to branching and such. This will fail if the we don't have a
4442 * current shadow copy handy or if it's incompatible. The only code that will
4443 * be emitted here is value checking code in strict builds.
4444 *
4445 * The intended use can only be readonly!
4446 *
4447 * @returns The host register number, UINT8_MAX if not present.
4448 * @param pReNative The native recompile state.
4449 * @param poff Pointer to the instruction buffer offset.
4450 * Will be updated in strict builds if a register is
4451 * found.
4452 * @param enmGstReg The guest register that will is to be updated.
4453 * @note In strict builds, this may throw instruction buffer growth failures.
4454 * Non-strict builds will not throw anything.
4455 * @sa iemNativeRegAllocTmpForGuestReg
4456 */
4457DECL_HIDDEN_THROW(uint8_t)
4458iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4459{
4460 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4461#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4462 AssertMsg( pReNative->idxCurCall == 0
4463 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4464 || enmGstReg == kIemNativeGstReg_Pc,
4465 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4466#endif
4467
4468 /*
4469 * First check if the guest register value is already in a host register.
4470 */
4471 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4472 {
4473 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4474 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4475 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4476 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4477
4478 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4479 {
4480 /*
4481 * We only do readonly use here, so easy compared to the other
4482 * variant of this code.
4483 */
4484 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4485 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4486 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4487 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4488 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4489
4490#ifdef VBOX_STRICT
4491 /* Strict builds: Check that the value is correct. */
4492 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4493#else
4494 RT_NOREF(poff);
4495#endif
4496 return idxReg;
4497 }
4498 }
4499
4500 return UINT8_MAX;
4501}
4502
4503
4504/**
4505 * Allocates argument registers for a function call.
4506 *
4507 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4508 * need to check the return value.
4509 * @param pReNative The native recompile state.
4510 * @param off The current code buffer offset.
4511 * @param cArgs The number of arguments the function call takes.
4512 */
4513DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4514{
4515 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4516 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4517 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4518 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4519
4520 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4521 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4522 else if (cArgs == 0)
4523 return true;
4524
4525 /*
4526 * Do we get luck and all register are free and not shadowing anything?
4527 */
4528 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4529 for (uint32_t i = 0; i < cArgs; i++)
4530 {
4531 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4532 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4533 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4534 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4535 }
4536 /*
4537 * Okay, not lucky so we have to free up the registers.
4538 */
4539 else
4540 for (uint32_t i = 0; i < cArgs; i++)
4541 {
4542 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4543 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4544 {
4545 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4546 {
4547 case kIemNativeWhat_Var:
4548 {
4549 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4550 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4551 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4552 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4553 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4554#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4555 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4556#endif
4557
4558 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4559 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4560 else
4561 {
4562 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4563 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4564 }
4565 break;
4566 }
4567
4568 case kIemNativeWhat_Tmp:
4569 case kIemNativeWhat_Arg:
4570 case kIemNativeWhat_rc:
4571 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4572 default:
4573 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4574 }
4575
4576 }
4577 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4578 {
4579 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4580 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4581 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4582 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4583 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4584 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4585 }
4586 else
4587 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4588 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4589 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4590 }
4591 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4592 return true;
4593}
4594
4595
4596DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4597
4598
4599#if 0
4600/**
4601 * Frees a register assignment of any type.
4602 *
4603 * @param pReNative The native recompile state.
4604 * @param idxHstReg The register to free.
4605 *
4606 * @note Does not update variables.
4607 */
4608DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4609{
4610 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4611 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4612 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4613 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4614 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4615 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4616 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4617 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4618 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4619 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4620 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4621 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4622 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4623 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4624
4625 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4626 /* no flushing, right:
4627 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4628 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4629 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4630 */
4631}
4632#endif
4633
4634
4635/**
4636 * Frees a temporary register.
4637 *
4638 * Any shadow copies of guest registers assigned to the host register will not
4639 * be flushed by this operation.
4640 */
4641DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4642{
4643 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4644 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4645 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4646 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4647 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4648}
4649
4650
4651/**
4652 * Frees a temporary immediate register.
4653 *
4654 * It is assumed that the call has not modified the register, so it still hold
4655 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4656 */
4657DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4658{
4659 iemNativeRegFreeTmp(pReNative, idxHstReg);
4660}
4661
4662
4663/**
4664 * Frees a register assigned to a variable.
4665 *
4666 * The register will be disassociated from the variable.
4667 */
4668DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4669{
4670 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4671 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4672 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4673 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4674 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4675#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4676 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4677#endif
4678
4679 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4680 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4681 if (!fFlushShadows)
4682 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4683 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4684 else
4685 {
4686 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4687 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4688 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4689 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4690 uint64_t fGstRegShadows = fGstRegShadowsOld;
4691 while (fGstRegShadows)
4692 {
4693 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4694 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4695
4696 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4697 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4698 }
4699 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4700 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4701 }
4702}
4703
4704
4705#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4706# ifdef LOG_ENABLED
4707/** Host CPU SIMD register names. */
4708DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4709{
4710# ifdef RT_ARCH_AMD64
4711 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4712# elif RT_ARCH_ARM64
4713 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4714 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4715# else
4716# error "port me"
4717# endif
4718};
4719# endif
4720
4721
4722/**
4723 * Frees a SIMD register assigned to a variable.
4724 *
4725 * The register will be disassociated from the variable.
4726 */
4727DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4728{
4729 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4730 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4731 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4732 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4733 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4734 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4735
4736 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4737 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4738 if (!fFlushShadows)
4739 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4740 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4741 else
4742 {
4743 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4744 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4745 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4746 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4747 uint64_t fGstRegShadows = fGstRegShadowsOld;
4748 while (fGstRegShadows)
4749 {
4750 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4751 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4752
4753 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4754 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4755 }
4756 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4757 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4758 }
4759}
4760#endif
4761
4762
4763/**
4764 * Called right before emitting a call instruction to move anything important
4765 * out of call-volatile registers, free and flush the call-volatile registers,
4766 * optionally freeing argument variables.
4767 *
4768 * @returns New code buffer offset, UINT32_MAX on failure.
4769 * @param pReNative The native recompile state.
4770 * @param off The code buffer offset.
4771 * @param cArgs The number of arguments the function call takes.
4772 * It is presumed that the host register part of these have
4773 * been allocated as such already and won't need moving,
4774 * just freeing.
4775 * @param fKeepVars Mask of variables that should keep their register
4776 * assignments. Caller must take care to handle these.
4777 */
4778DECL_HIDDEN_THROW(uint32_t)
4779iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4780{
4781 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4782
4783 /* fKeepVars will reduce this mask. */
4784 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4785
4786 /*
4787 * Move anything important out of volatile registers.
4788 */
4789 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4790 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4791 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4792#ifdef IEMNATIVE_REG_FIXED_TMP0
4793 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4794#endif
4795#ifdef IEMNATIVE_REG_FIXED_TMP1
4796 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4797#endif
4798#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4799 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4800#endif
4801 & ~g_afIemNativeCallRegs[cArgs];
4802
4803 fRegsToMove &= pReNative->Core.bmHstRegs;
4804 if (!fRegsToMove)
4805 { /* likely */ }
4806 else
4807 {
4808 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4809 while (fRegsToMove != 0)
4810 {
4811 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4812 fRegsToMove &= ~RT_BIT_32(idxReg);
4813
4814 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4815 {
4816 case kIemNativeWhat_Var:
4817 {
4818 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4819 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4820 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4821 Assert(pVar->idxReg == idxReg);
4822 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4823 {
4824 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4825 idxVar, pVar->enmKind, pVar->idxReg));
4826 if (pVar->enmKind != kIemNativeVarKind_Stack)
4827 pVar->idxReg = UINT8_MAX;
4828 else
4829 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4830 }
4831 else
4832 fRegsToFree &= ~RT_BIT_32(idxReg);
4833 continue;
4834 }
4835
4836 case kIemNativeWhat_Arg:
4837 AssertMsgFailed(("What?!?: %u\n", idxReg));
4838 continue;
4839
4840 case kIemNativeWhat_rc:
4841 case kIemNativeWhat_Tmp:
4842 AssertMsgFailed(("Missing free: %u\n", idxReg));
4843 continue;
4844
4845 case kIemNativeWhat_FixedTmp:
4846 case kIemNativeWhat_pVCpuFixed:
4847 case kIemNativeWhat_pCtxFixed:
4848 case kIemNativeWhat_PcShadow:
4849 case kIemNativeWhat_FixedReserved:
4850 case kIemNativeWhat_Invalid:
4851 case kIemNativeWhat_End:
4852 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4853 }
4854 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4855 }
4856 }
4857
4858 /*
4859 * Do the actual freeing.
4860 */
4861 if (pReNative->Core.bmHstRegs & fRegsToFree)
4862 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4863 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4864 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4865
4866 /* If there are guest register shadows in any call-volatile register, we
4867 have to clear the corrsponding guest register masks for each register. */
4868 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4869 if (fHstRegsWithGstShadow)
4870 {
4871 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4872 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4873 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4874 do
4875 {
4876 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4877 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4878
4879 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4880 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4881 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4882 } while (fHstRegsWithGstShadow != 0);
4883 }
4884
4885 return off;
4886}
4887
4888
4889/**
4890 * Flushes a set of guest register shadow copies.
4891 *
4892 * This is usually done after calling a threaded function or a C-implementation
4893 * of an instruction.
4894 *
4895 * @param pReNative The native recompile state.
4896 * @param fGstRegs Set of guest registers to flush.
4897 */
4898DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4899{
4900 /*
4901 * Reduce the mask by what's currently shadowed
4902 */
4903 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4904 fGstRegs &= bmGstRegShadowsOld;
4905 if (fGstRegs)
4906 {
4907 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4908 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4909 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4910 if (bmGstRegShadowsNew)
4911 {
4912 /*
4913 * Partial.
4914 */
4915 do
4916 {
4917 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4918 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4919 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4920 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4921 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4922
4923 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4924 fGstRegs &= ~fInThisHstReg;
4925 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4926 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4927 if (!fGstRegShadowsNew)
4928 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4929 } while (fGstRegs != 0);
4930 }
4931 else
4932 {
4933 /*
4934 * Clear all.
4935 */
4936 do
4937 {
4938 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4939 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4940 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4941 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4942 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4943
4944 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4945 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4946 } while (fGstRegs != 0);
4947 pReNative->Core.bmHstRegsWithGstShadow = 0;
4948 }
4949 }
4950}
4951
4952
4953/**
4954 * Flushes guest register shadow copies held by a set of host registers.
4955 *
4956 * This is used with the TLB lookup code for ensuring that we don't carry on
4957 * with any guest shadows in volatile registers, as these will get corrupted by
4958 * a TLB miss.
4959 *
4960 * @param pReNative The native recompile state.
4961 * @param fHstRegs Set of host registers to flush guest shadows for.
4962 */
4963DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4964{
4965 /*
4966 * Reduce the mask by what's currently shadowed.
4967 */
4968 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4969 fHstRegs &= bmHstRegsWithGstShadowOld;
4970 if (fHstRegs)
4971 {
4972 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4973 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4974 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4975 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4976 if (bmHstRegsWithGstShadowNew)
4977 {
4978 /*
4979 * Partial (likely).
4980 */
4981 uint64_t fGstShadows = 0;
4982 do
4983 {
4984 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4985 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4986 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4987 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4988
4989 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4990 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4991 fHstRegs &= ~RT_BIT_32(idxHstReg);
4992 } while (fHstRegs != 0);
4993 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4994 }
4995 else
4996 {
4997 /*
4998 * Clear all.
4999 */
5000 do
5001 {
5002 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5003 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5004 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5005 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5006
5007 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5008 fHstRegs &= ~RT_BIT_32(idxHstReg);
5009 } while (fHstRegs != 0);
5010 pReNative->Core.bmGstRegShadows = 0;
5011 }
5012 }
5013}
5014
5015
5016/**
5017 * Restores guest shadow copies in volatile registers.
5018 *
5019 * This is used after calling a helper function (think TLB miss) to restore the
5020 * register state of volatile registers.
5021 *
5022 * @param pReNative The native recompile state.
5023 * @param off The code buffer offset.
5024 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5025 * be active (allocated) w/o asserting. Hack.
5026 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5027 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5028 */
5029DECL_HIDDEN_THROW(uint32_t)
5030iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5031{
5032 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5033 if (fHstRegs)
5034 {
5035 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5036 do
5037 {
5038 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5039
5040 /* It's not fatal if a register is active holding a variable that
5041 shadowing a guest register, ASSUMING all pending guest register
5042 writes were flushed prior to the helper call. However, we'll be
5043 emitting duplicate restores, so it wasts code space. */
5044 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5045 RT_NOREF(fHstRegsActiveShadows);
5046
5047 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5048 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5049 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5050 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5051
5052 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5053 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5054
5055 fHstRegs &= ~RT_BIT_32(idxHstReg);
5056 } while (fHstRegs != 0);
5057 }
5058 return off;
5059}
5060
5061
5062
5063
5064/*********************************************************************************************************************************
5065* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5066*********************************************************************************************************************************/
5067#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5068
5069/**
5070 * Info about shadowed guest SIMD register values.
5071 * @see IEMNATIVEGSTSIMDREG
5072 */
5073static struct
5074{
5075 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5076 uint32_t offXmm;
5077 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5078 uint32_t offYmm;
5079 /** Name (for logging). */
5080 const char *pszName;
5081} const g_aGstSimdShadowInfo[] =
5082{
5083#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5084 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5085 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5086 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5087 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5088 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5089 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5090 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5091 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5092 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5093 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5094 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5095 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5096 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5097 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5098 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5099 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5100 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5101#undef CPUMCTX_OFF_AND_SIZE
5102};
5103AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5104
5105
5106/**
5107 * Frees a temporary SIMD register.
5108 *
5109 * Any shadow copies of guest registers assigned to the host register will not
5110 * be flushed by this operation.
5111 */
5112DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5113{
5114 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5115 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5116 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5117 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5118 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5119}
5120
5121
5122/**
5123 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5124 *
5125 * @returns New code bufferoffset.
5126 * @param pReNative The native recompile state.
5127 * @param off Current code buffer position.
5128 * @param enmGstSimdReg The guest SIMD register to flush.
5129 */
5130DECL_HIDDEN_THROW(uint32_t)
5131iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5132{
5133 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5134
5135 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5136 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5137 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5138 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5139
5140 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5141 {
5142 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5143 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5144 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5145 }
5146
5147 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5148 {
5149 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5150 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5151 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5152 }
5153
5154 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5155 return off;
5156}
5157
5158
5159/**
5160 * Locate a register, possibly freeing one up.
5161 *
5162 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5163 * failed.
5164 *
5165 * @returns Host register number on success. Returns UINT8_MAX if no registers
5166 * found, the caller is supposed to deal with this and raise a
5167 * allocation type specific status code (if desired).
5168 *
5169 * @throws VBox status code if we're run into trouble spilling a variable of
5170 * recording debug info. Does NOT throw anything if we're out of
5171 * registers, though.
5172 */
5173static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5174 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5175{
5176 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5177 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5178 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5179
5180 /*
5181 * Try a freed register that's shadowing a guest register.
5182 */
5183 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5184 if (fRegs)
5185 {
5186 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5187
5188#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5189 /*
5190 * When we have livness information, we use it to kick out all shadowed
5191 * guest register that will not be needed any more in this TB. If we're
5192 * lucky, this may prevent us from ending up here again.
5193 *
5194 * Note! We must consider the previous entry here so we don't free
5195 * anything that the current threaded function requires (current
5196 * entry is produced by the next threaded function).
5197 */
5198 uint32_t const idxCurCall = pReNative->idxCurCall;
5199 if (idxCurCall > 0)
5200 {
5201 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5202
5203# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5204 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5205 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5206 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5207#else
5208 /* Construct a mask of the registers not in the read or write state.
5209 Note! We could skips writes, if they aren't from us, as this is just
5210 a hack to prevent trashing registers that have just been written
5211 or will be written when we retire the current instruction. */
5212 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5213 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5214 & IEMLIVENESSBIT_MASK;
5215#endif
5216 /* If it matches any shadowed registers. */
5217 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5218 {
5219 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5220 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5221 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5222
5223 /* See if we've got any unshadowed registers we can return now. */
5224 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5225 if (fUnshadowedRegs)
5226 {
5227 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5228 return (fPreferVolatile
5229 ? ASMBitFirstSetU32(fUnshadowedRegs)
5230 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5231 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5232 - 1;
5233 }
5234 }
5235 }
5236#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5237
5238 unsigned const idxReg = (fPreferVolatile
5239 ? ASMBitFirstSetU32(fRegs)
5240 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5241 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5242 - 1;
5243
5244 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5245 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5246 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5247 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5248
5249 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5250 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5251 uint32_t idxGstSimdReg = 0;
5252 do
5253 {
5254 if (fGstRegShadows & 0x1)
5255 {
5256 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5257 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5258 }
5259 idxGstSimdReg++;
5260 fGstRegShadows >>= 1;
5261 } while (fGstRegShadows);
5262
5263 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5264 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5265 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5266 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5267 return idxReg;
5268 }
5269
5270 /*
5271 * Try free up a variable that's in a register.
5272 *
5273 * We do two rounds here, first evacuating variables we don't need to be
5274 * saved on the stack, then in the second round move things to the stack.
5275 */
5276 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5277 AssertReleaseFailed(); /** @todo No variable support right now. */
5278#if 0
5279 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5280 {
5281 uint32_t fVars = pReNative->Core.bmSimdVars;
5282 while (fVars)
5283 {
5284 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5285 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5286 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5287 && (RT_BIT_32(idxReg) & fRegMask)
5288 && ( iLoop == 0
5289 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5290 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5291 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5292 {
5293 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5294 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5295 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5296 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5297 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5298 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5299
5300 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5301 {
5302 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5303 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5304 }
5305
5306 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5307 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5308
5309 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5310 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5311 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5312 return idxReg;
5313 }
5314 fVars &= ~RT_BIT_32(idxVar);
5315 }
5316 }
5317#endif
5318
5319 AssertFailed();
5320 return UINT8_MAX;
5321}
5322
5323
5324/**
5325 * Flushes a set of guest register shadow copies.
5326 *
5327 * This is usually done after calling a threaded function or a C-implementation
5328 * of an instruction.
5329 *
5330 * @param pReNative The native recompile state.
5331 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5332 */
5333DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5334{
5335 /*
5336 * Reduce the mask by what's currently shadowed
5337 */
5338 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5339 fGstSimdRegs &= bmGstSimdRegShadows;
5340 if (fGstSimdRegs)
5341 {
5342 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5343 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5344 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5345 if (bmGstSimdRegShadowsNew)
5346 {
5347 /*
5348 * Partial.
5349 */
5350 do
5351 {
5352 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5353 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5354 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5355 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5356 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5357 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5358
5359 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5360 fGstSimdRegs &= ~fInThisHstReg;
5361 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5362 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5363 if (!fGstRegShadowsNew)
5364 {
5365 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5366 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5367 }
5368 } while (fGstSimdRegs != 0);
5369 }
5370 else
5371 {
5372 /*
5373 * Clear all.
5374 */
5375 do
5376 {
5377 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5378 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5379 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5380 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5381 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5382 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5383
5384 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5385 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5386 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5387 } while (fGstSimdRegs != 0);
5388 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5389 }
5390 }
5391}
5392
5393
5394/**
5395 * Allocates a temporary host SIMD register.
5396 *
5397 * This may emit code to save register content onto the stack in order to free
5398 * up a register.
5399 *
5400 * @returns The host register number; throws VBox status code on failure,
5401 * so no need to check the return value.
5402 * @param pReNative The native recompile state.
5403 * @param poff Pointer to the variable with the code buffer position.
5404 * This will be update if we need to move a variable from
5405 * register to stack in order to satisfy the request.
5406 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5407 * registers (@c true, default) or the other way around
5408 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5409 */
5410DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5411{
5412 /*
5413 * Try find a completely unused register, preferably a call-volatile one.
5414 */
5415 uint8_t idxSimdReg;
5416 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5417 & ~pReNative->Core.bmHstRegsWithGstShadow
5418 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5419 if (fRegs)
5420 {
5421 if (fPreferVolatile)
5422 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5423 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5424 else
5425 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5426 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5427 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5428 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5429 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5430 }
5431 else
5432 {
5433 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5434 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5435 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5436 }
5437
5438 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5439 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5440}
5441
5442
5443/**
5444 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5445 * registers.
5446 *
5447 * @returns The host register number; throws VBox status code on failure,
5448 * so no need to check the return value.
5449 * @param pReNative The native recompile state.
5450 * @param poff Pointer to the variable with the code buffer position.
5451 * This will be update if we need to move a variable from
5452 * register to stack in order to satisfy the request.
5453 * @param fRegMask Mask of acceptable registers.
5454 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5455 * registers (@c true, default) or the other way around
5456 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5457 */
5458DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5459 bool fPreferVolatile /*= true*/)
5460{
5461 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5462 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5463
5464 /*
5465 * Try find a completely unused register, preferably a call-volatile one.
5466 */
5467 uint8_t idxSimdReg;
5468 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5469 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5470 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5471 & fRegMask;
5472 if (fRegs)
5473 {
5474 if (fPreferVolatile)
5475 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5476 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5477 else
5478 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5479 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5480 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5481 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5482 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5483 }
5484 else
5485 {
5486 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5487 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5488 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5489 }
5490
5491 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5492 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5493}
5494
5495
5496/**
5497 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5498 *
5499 * @param pReNative The native recompile state.
5500 * @param idxHstSimdReg The host SIMD register to update the state for.
5501 * @param enmLoadSz The load size to set.
5502 */
5503DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5504 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5505{
5506 /* Everything valid already? -> nothing to do. */
5507 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5508 return;
5509
5510 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5511 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5512 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5513 {
5514 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5515 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5516 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5517 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5518 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5519 }
5520}
5521
5522
5523static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5524 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5525{
5526 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5527 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5528 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5529 {
5530# ifdef RT_ARCH_ARM64
5531 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5532 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5533# endif
5534
5535 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5536 {
5537 switch (enmLoadSzDst)
5538 {
5539 case kIemNativeGstSimdRegLdStSz_256:
5540 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5541 break;
5542 case kIemNativeGstSimdRegLdStSz_Low128:
5543 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5544 break;
5545 case kIemNativeGstSimdRegLdStSz_High128:
5546 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5547 break;
5548 default:
5549 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5550 }
5551
5552 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5553 }
5554 }
5555 else
5556 {
5557 /* Complicated stuff where the source is currently missing something, later. */
5558 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5559 }
5560
5561 return off;
5562}
5563
5564
5565/**
5566 * Allocates a temporary host SIMD register for keeping a guest
5567 * SIMD register value.
5568 *
5569 * Since we may already have a register holding the guest register value,
5570 * code will be emitted to do the loading if that's not the case. Code may also
5571 * be emitted if we have to free up a register to satify the request.
5572 *
5573 * @returns The host register number; throws VBox status code on failure, so no
5574 * need to check the return value.
5575 * @param pReNative The native recompile state.
5576 * @param poff Pointer to the variable with the code buffer
5577 * position. This will be update if we need to move a
5578 * variable from register to stack in order to satisfy
5579 * the request.
5580 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5581 * @param enmIntendedUse How the caller will be using the host register.
5582 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5583 * register is okay (default). The ASSUMPTION here is
5584 * that the caller has already flushed all volatile
5585 * registers, so this is only applied if we allocate a
5586 * new register.
5587 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5588 */
5589DECL_HIDDEN_THROW(uint8_t)
5590iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5591 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5592 bool fNoVolatileRegs /*= false*/)
5593{
5594 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5595#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5596 AssertMsg( pReNative->idxCurCall == 0
5597 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5598 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5599 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5600 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5601 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5602 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5603#endif
5604#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5605 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5606#endif
5607 uint32_t const fRegMask = !fNoVolatileRegs
5608 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5609 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5610
5611 /*
5612 * First check if the guest register value is already in a host register.
5613 */
5614 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5615 {
5616 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5617 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5618 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5619 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5620
5621 /* It's not supposed to be allocated... */
5622 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5623 {
5624 /*
5625 * If the register will trash the guest shadow copy, try find a
5626 * completely unused register we can use instead. If that fails,
5627 * we need to disassociate the host reg from the guest reg.
5628 */
5629 /** @todo would be nice to know if preserving the register is in any way helpful. */
5630 /* If the purpose is calculations, try duplicate the register value as
5631 we'll be clobbering the shadow. */
5632 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5633 && ( ~pReNative->Core.bmHstSimdRegs
5634 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5635 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5636 {
5637 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5638
5639 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5640
5641 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5642 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5643 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5644 idxSimdReg = idxRegNew;
5645 }
5646 /* If the current register matches the restrictions, go ahead and allocate
5647 it for the caller. */
5648 else if (fRegMask & RT_BIT_32(idxSimdReg))
5649 {
5650 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5651 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5652 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5653 {
5654 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5655 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5656 else
5657 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5658 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5659 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5660 }
5661 else
5662 {
5663 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5664 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5665 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5666 }
5667 }
5668 /* Otherwise, allocate a register that satisfies the caller and transfer
5669 the shadowing if compatible with the intended use. (This basically
5670 means the call wants a non-volatile register (RSP push/pop scenario).) */
5671 else
5672 {
5673 Assert(fNoVolatileRegs);
5674 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5675 !fNoVolatileRegs
5676 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5677 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5678 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5679 {
5680 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5681 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5682 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5683 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5684 }
5685 else
5686 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5687 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5688 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5689 idxSimdReg = idxRegNew;
5690 }
5691 }
5692 else
5693 {
5694 /*
5695 * Oops. Shadowed guest register already allocated!
5696 *
5697 * Allocate a new register, copy the value and, if updating, the
5698 * guest shadow copy assignment to the new register.
5699 */
5700 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5701 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5702 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5703 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5704
5705 /** @todo share register for readonly access. */
5706 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5707 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5708
5709 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5710 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5711 else
5712 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5713
5714 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5715 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5716 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5717 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5718 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5719 else
5720 {
5721 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5722 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5723 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5724 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5725 }
5726 idxSimdReg = idxRegNew;
5727 }
5728 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5729
5730#ifdef VBOX_STRICT
5731 /* Strict builds: Check that the value is correct. */
5732 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5733 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5734#endif
5735
5736 return idxSimdReg;
5737 }
5738
5739 /*
5740 * Allocate a new register, load it with the guest value and designate it as a copy of the
5741 */
5742 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5743
5744 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5745 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5746 else
5747 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5748
5749 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5750 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5751
5752 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5753 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5754
5755 return idxRegNew;
5756}
5757
5758#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5759
5760
5761
5762/*********************************************************************************************************************************
5763* Code emitters for flushing pending guest register writes and sanity checks *
5764*********************************************************************************************************************************/
5765
5766#ifdef VBOX_STRICT
5767/**
5768 * Does internal register allocator sanity checks.
5769 */
5770DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5771{
5772 /*
5773 * Iterate host registers building a guest shadowing set.
5774 */
5775 uint64_t bmGstRegShadows = 0;
5776 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5777 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5778 while (bmHstRegsWithGstShadow)
5779 {
5780 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5781 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5782 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5783
5784 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5785 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5786 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5787 bmGstRegShadows |= fThisGstRegShadows;
5788 while (fThisGstRegShadows)
5789 {
5790 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5791 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5792 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5793 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5794 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5795 }
5796 }
5797 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5798 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5799 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5800
5801 /*
5802 * Now the other way around, checking the guest to host index array.
5803 */
5804 bmHstRegsWithGstShadow = 0;
5805 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5806 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5807 while (bmGstRegShadows)
5808 {
5809 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5810 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5811 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5812
5813 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5814 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5815 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5816 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5817 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5818 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5819 }
5820 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5821 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5822 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5823}
5824#endif /* VBOX_STRICT */
5825
5826
5827/**
5828 * Flushes any delayed guest register writes.
5829 *
5830 * This must be called prior to calling CImpl functions and any helpers that use
5831 * the guest state (like raising exceptions) and such.
5832 *
5833 * This optimization has not yet been implemented. The first target would be
5834 * RIP updates, since these are the most common ones.
5835 */
5836DECL_HIDDEN_THROW(uint32_t)
5837iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5838{
5839#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5840 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5841 off = iemNativeEmitPcWriteback(pReNative, off);
5842#else
5843 RT_NOREF(pReNative, fGstShwExcept);
5844#endif
5845
5846#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5847 /** @todo r=bird: There must be a quicker way to check if anything needs
5848 * doing and then call simd function to do the flushing */
5849 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5850 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5851 {
5852 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5853 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5854
5855 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5856 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5857
5858 if ( fFlushShadows
5859 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5860 {
5861 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5862
5863 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5864 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5865 }
5866 }
5867#else
5868 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5869#endif
5870
5871 return off;
5872}
5873
5874
5875#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5876/**
5877 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5878 */
5879DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5880{
5881 Assert(pReNative->Core.offPc);
5882# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5883 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5884 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5885# endif
5886
5887# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5888 /* Allocate a temporary PC register. */
5889 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5890
5891 /* Perform the addition and store the result. */
5892 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5893 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5894
5895 /* Free but don't flush the PC register. */
5896 iemNativeRegFreeTmp(pReNative, idxPcReg);
5897# else
5898 /* Compare the shadow with the context value, they should match. */
5899 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5900 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5901# endif
5902
5903 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5904 pReNative->Core.offPc = 0;
5905 pReNative->Core.cInstrPcUpdateSkipped = 0;
5906
5907 return off;
5908}
5909#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5910
5911
5912/*********************************************************************************************************************************
5913* Code Emitters (larger snippets) *
5914*********************************************************************************************************************************/
5915
5916/**
5917 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5918 * extending to 64-bit width.
5919 *
5920 * @returns New code buffer offset on success, UINT32_MAX on failure.
5921 * @param pReNative .
5922 * @param off The current code buffer position.
5923 * @param idxHstReg The host register to load the guest register value into.
5924 * @param enmGstReg The guest register to load.
5925 *
5926 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5927 * that is something the caller needs to do if applicable.
5928 */
5929DECL_HIDDEN_THROW(uint32_t)
5930iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5931{
5932 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5933 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5934
5935 switch (g_aGstShadowInfo[enmGstReg].cb)
5936 {
5937 case sizeof(uint64_t):
5938 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5939 case sizeof(uint32_t):
5940 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5941 case sizeof(uint16_t):
5942 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5943#if 0 /* not present in the table. */
5944 case sizeof(uint8_t):
5945 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5946#endif
5947 default:
5948 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5949 }
5950}
5951
5952
5953#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5954/**
5955 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5956 *
5957 * @returns New code buffer offset on success, UINT32_MAX on failure.
5958 * @param pReNative The recompiler state.
5959 * @param off The current code buffer position.
5960 * @param idxHstSimdReg The host register to load the guest register value into.
5961 * @param enmGstSimdReg The guest register to load.
5962 * @param enmLoadSz The load size of the register.
5963 *
5964 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5965 * that is something the caller needs to do if applicable.
5966 */
5967DECL_HIDDEN_THROW(uint32_t)
5968iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5969 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5970{
5971 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5972
5973 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5974 switch (enmLoadSz)
5975 {
5976 case kIemNativeGstSimdRegLdStSz_256:
5977 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5978 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5979 case kIemNativeGstSimdRegLdStSz_Low128:
5980 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5981 case kIemNativeGstSimdRegLdStSz_High128:
5982 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5983 default:
5984 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5985 }
5986}
5987#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5988
5989#ifdef VBOX_STRICT
5990
5991/**
5992 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5993 *
5994 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5995 * Trashes EFLAGS on AMD64.
5996 */
5997DECL_HIDDEN_THROW(uint32_t)
5998iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5999{
6000# ifdef RT_ARCH_AMD64
6001 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6002
6003 /* rol reg64, 32 */
6004 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6005 pbCodeBuf[off++] = 0xc1;
6006 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6007 pbCodeBuf[off++] = 32;
6008
6009 /* test reg32, ffffffffh */
6010 if (idxReg >= 8)
6011 pbCodeBuf[off++] = X86_OP_REX_B;
6012 pbCodeBuf[off++] = 0xf7;
6013 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6014 pbCodeBuf[off++] = 0xff;
6015 pbCodeBuf[off++] = 0xff;
6016 pbCodeBuf[off++] = 0xff;
6017 pbCodeBuf[off++] = 0xff;
6018
6019 /* je/jz +1 */
6020 pbCodeBuf[off++] = 0x74;
6021 pbCodeBuf[off++] = 0x01;
6022
6023 /* int3 */
6024 pbCodeBuf[off++] = 0xcc;
6025
6026 /* rol reg64, 32 */
6027 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6028 pbCodeBuf[off++] = 0xc1;
6029 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6030 pbCodeBuf[off++] = 32;
6031
6032# elif defined(RT_ARCH_ARM64)
6033 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6034 /* lsr tmp0, reg64, #32 */
6035 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6036 /* cbz tmp0, +1 */
6037 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6038 /* brk #0x1100 */
6039 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6040
6041# else
6042# error "Port me!"
6043# endif
6044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6045 return off;
6046}
6047
6048
6049/**
6050 * Emitting code that checks that the content of register @a idxReg is the same
6051 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6052 * instruction if that's not the case.
6053 *
6054 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6055 * Trashes EFLAGS on AMD64.
6056 */
6057DECL_HIDDEN_THROW(uint32_t)
6058iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6059{
6060# ifdef RT_ARCH_AMD64
6061 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6062
6063 /* cmp reg, [mem] */
6064 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6065 {
6066 if (idxReg >= 8)
6067 pbCodeBuf[off++] = X86_OP_REX_R;
6068 pbCodeBuf[off++] = 0x38;
6069 }
6070 else
6071 {
6072 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6073 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6074 else
6075 {
6076 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6077 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6078 else
6079 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6080 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6081 if (idxReg >= 8)
6082 pbCodeBuf[off++] = X86_OP_REX_R;
6083 }
6084 pbCodeBuf[off++] = 0x39;
6085 }
6086 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6087
6088 /* je/jz +1 */
6089 pbCodeBuf[off++] = 0x74;
6090 pbCodeBuf[off++] = 0x01;
6091
6092 /* int3 */
6093 pbCodeBuf[off++] = 0xcc;
6094
6095 /* For values smaller than the register size, we must check that the rest
6096 of the register is all zeros. */
6097 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6098 {
6099 /* test reg64, imm32 */
6100 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6101 pbCodeBuf[off++] = 0xf7;
6102 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6103 pbCodeBuf[off++] = 0;
6104 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6105 pbCodeBuf[off++] = 0xff;
6106 pbCodeBuf[off++] = 0xff;
6107
6108 /* je/jz +1 */
6109 pbCodeBuf[off++] = 0x74;
6110 pbCodeBuf[off++] = 0x01;
6111
6112 /* int3 */
6113 pbCodeBuf[off++] = 0xcc;
6114 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6115 }
6116 else
6117 {
6118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6119 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6120 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6121 }
6122
6123# elif defined(RT_ARCH_ARM64)
6124 /* mov TMP0, [gstreg] */
6125 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6126
6127 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6128 /* sub tmp0, tmp0, idxReg */
6129 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6130 /* cbz tmp0, +1 */
6131 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6132 /* brk #0x1000+enmGstReg */
6133 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6134 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6135
6136# else
6137# error "Port me!"
6138# endif
6139 return off;
6140}
6141
6142
6143# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6144# ifdef RT_ARCH_AMD64
6145/**
6146 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6147 */
6148DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6149{
6150 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6151 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6152 if (idxSimdReg >= 8)
6153 pbCodeBuf[off++] = X86_OP_REX_R;
6154 pbCodeBuf[off++] = 0x0f;
6155 pbCodeBuf[off++] = 0x38;
6156 pbCodeBuf[off++] = 0x29;
6157 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6158
6159 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6160 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6161 pbCodeBuf[off++] = X86_OP_REX_W
6162 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6163 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6164 pbCodeBuf[off++] = 0x0f;
6165 pbCodeBuf[off++] = 0x3a;
6166 pbCodeBuf[off++] = 0x16;
6167 pbCodeBuf[off++] = 0xeb;
6168 pbCodeBuf[off++] = 0x00;
6169
6170 /* cmp tmp0, 0xffffffffffffffff. */
6171 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6172 pbCodeBuf[off++] = 0x83;
6173 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6174 pbCodeBuf[off++] = 0xff;
6175
6176 /* je/jz +1 */
6177 pbCodeBuf[off++] = 0x74;
6178 pbCodeBuf[off++] = 0x01;
6179
6180 /* int3 */
6181 pbCodeBuf[off++] = 0xcc;
6182
6183 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6184 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6185 pbCodeBuf[off++] = X86_OP_REX_W
6186 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6187 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6188 pbCodeBuf[off++] = 0x0f;
6189 pbCodeBuf[off++] = 0x3a;
6190 pbCodeBuf[off++] = 0x16;
6191 pbCodeBuf[off++] = 0xeb;
6192 pbCodeBuf[off++] = 0x01;
6193
6194 /* cmp tmp0, 0xffffffffffffffff. */
6195 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6196 pbCodeBuf[off++] = 0x83;
6197 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6198 pbCodeBuf[off++] = 0xff;
6199
6200 /* je/jz +1 */
6201 pbCodeBuf[off++] = 0x74;
6202 pbCodeBuf[off++] = 0x01;
6203
6204 /* int3 */
6205 pbCodeBuf[off++] = 0xcc;
6206
6207 return off;
6208}
6209# endif
6210
6211
6212/**
6213 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6214 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6215 * instruction if that's not the case.
6216 *
6217 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6218 * Trashes EFLAGS on AMD64.
6219 */
6220DECL_HIDDEN_THROW(uint32_t)
6221iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6222 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6223{
6224 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6225 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6226 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6227 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6228 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6229 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6230 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6231 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6232 return off;
6233
6234# ifdef RT_ARCH_AMD64
6235 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6236 {
6237 /* movdqa vectmp0, idxSimdReg */
6238 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6239
6240 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6241
6242 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6243 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6244 }
6245
6246 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6247 {
6248 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6249 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6250
6251 /* vextracti128 vectmp0, idxSimdReg, 1 */
6252 pbCodeBuf[off++] = X86_OP_VEX3;
6253 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6254 | X86_OP_VEX3_BYTE1_X
6255 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6256 | 0x03; /* Opcode map */
6257 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6258 pbCodeBuf[off++] = 0x39;
6259 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6260 pbCodeBuf[off++] = 0x01;
6261
6262 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6263 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6264 }
6265# elif defined(RT_ARCH_ARM64)
6266 /* mov vectmp0, [gstreg] */
6267 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6268
6269 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6270 {
6271 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6272 /* eor vectmp0, vectmp0, idxSimdReg */
6273 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6274 /* cnt vectmp0, vectmp0, #0*/
6275 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6276 /* umov tmp0, vectmp0.D[0] */
6277 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6278 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6279 /* cbz tmp0, +1 */
6280 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6281 /* brk #0x1000+enmGstReg */
6282 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6283 }
6284
6285 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6286 {
6287 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6288 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6289 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6290 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6291 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6292 /* umov tmp0, (vectmp0 + 1).D[0] */
6293 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6294 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6295 /* cbz tmp0, +1 */
6296 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6297 /* brk #0x1000+enmGstReg */
6298 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6299 }
6300
6301# else
6302# error "Port me!"
6303# endif
6304
6305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6306 return off;
6307}
6308# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6309
6310
6311/**
6312 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6313 * important bits.
6314 *
6315 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6316 * Trashes EFLAGS on AMD64.
6317 */
6318DECL_HIDDEN_THROW(uint32_t)
6319iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6320{
6321 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6322 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6323 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6324 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6325
6326#ifdef RT_ARCH_AMD64
6327 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6328
6329 /* je/jz +1 */
6330 pbCodeBuf[off++] = 0x74;
6331 pbCodeBuf[off++] = 0x01;
6332
6333 /* int3 */
6334 pbCodeBuf[off++] = 0xcc;
6335
6336# elif defined(RT_ARCH_ARM64)
6337 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6338
6339 /* b.eq +1 */
6340 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6341 /* brk #0x2000 */
6342 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6343
6344# else
6345# error "Port me!"
6346# endif
6347 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6348
6349 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6350 return off;
6351}
6352
6353#endif /* VBOX_STRICT */
6354
6355
6356#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6357/**
6358 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6359 */
6360DECL_HIDDEN_THROW(uint32_t)
6361iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6362{
6363 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6364
6365 fEflNeeded &= X86_EFL_STATUS_BITS;
6366 if (fEflNeeded)
6367 {
6368# ifdef RT_ARCH_AMD64
6369 /* test dword [pVCpu + offVCpu], imm32 */
6370 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6371 if (fEflNeeded <= 0xff)
6372 {
6373 pCodeBuf[off++] = 0xf6;
6374 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6375 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6376 }
6377 else
6378 {
6379 pCodeBuf[off++] = 0xf7;
6380 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6381 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6382 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6383 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6384 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6385 }
6386 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6387
6388# else
6389 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6390 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6391 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6392# ifdef RT_ARCH_ARM64
6393 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6394 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6395# else
6396# error "Port me!"
6397# endif
6398 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6399# endif
6400 }
6401 return off;
6402}
6403#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6404
6405
6406/**
6407 * Emits a code for checking the return code of a call and rcPassUp, returning
6408 * from the code if either are non-zero.
6409 */
6410DECL_HIDDEN_THROW(uint32_t)
6411iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6412{
6413#ifdef RT_ARCH_AMD64
6414 /*
6415 * AMD64: eax = call status code.
6416 */
6417
6418 /* edx = rcPassUp */
6419 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6420# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6421 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6422# endif
6423
6424 /* edx = eax | rcPassUp */
6425 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6426 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6427 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6428 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6429
6430 /* Jump to non-zero status return path. */
6431 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6432
6433 /* done. */
6434
6435#elif RT_ARCH_ARM64
6436 /*
6437 * ARM64: w0 = call status code.
6438 */
6439# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6440 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6441# endif
6442 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6443
6444 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6445
6446 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6447
6448 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6449 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6450 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6451
6452#else
6453# error "port me"
6454#endif
6455 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6456 RT_NOREF_PV(idxInstr);
6457 return off;
6458}
6459
6460
6461/**
6462 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6463 * raising a \#GP(0) if it isn't.
6464 *
6465 * @returns New code buffer offset, UINT32_MAX on failure.
6466 * @param pReNative The native recompile state.
6467 * @param off The code buffer offset.
6468 * @param idxAddrReg The host register with the address to check.
6469 * @param idxInstr The current instruction.
6470 */
6471DECL_HIDDEN_THROW(uint32_t)
6472iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6473{
6474 /*
6475 * Make sure we don't have any outstanding guest register writes as we may
6476 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6477 */
6478 off = iemNativeRegFlushPendingWrites(pReNative, off);
6479
6480#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6481 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6482#else
6483 RT_NOREF(idxInstr);
6484#endif
6485
6486#ifdef RT_ARCH_AMD64
6487 /*
6488 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6489 * return raisexcpt();
6490 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6491 */
6492 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6493
6494 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6495 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6496 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6497 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6498 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6499
6500 iemNativeRegFreeTmp(pReNative, iTmpReg);
6501
6502#elif defined(RT_ARCH_ARM64)
6503 /*
6504 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6505 * return raisexcpt();
6506 * ----
6507 * mov x1, 0x800000000000
6508 * add x1, x0, x1
6509 * cmp xzr, x1, lsr 48
6510 * b.ne .Lraisexcpt
6511 */
6512 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6513
6514 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6515 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6516 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6517 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6518
6519 iemNativeRegFreeTmp(pReNative, iTmpReg);
6520
6521#else
6522# error "Port me"
6523#endif
6524 return off;
6525}
6526
6527
6528/**
6529 * Emits code to check if that the content of @a idxAddrReg is within the limit
6530 * of CS, raising a \#GP(0) if it isn't.
6531 *
6532 * @returns New code buffer offset; throws VBox status code on error.
6533 * @param pReNative The native recompile state.
6534 * @param off The code buffer offset.
6535 * @param idxAddrReg The host register (32-bit) with the address to
6536 * check.
6537 * @param idxInstr The current instruction.
6538 */
6539DECL_HIDDEN_THROW(uint32_t)
6540iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6541 uint8_t idxAddrReg, uint8_t idxInstr)
6542{
6543 /*
6544 * Make sure we don't have any outstanding guest register writes as we may
6545 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6546 */
6547 off = iemNativeRegFlushPendingWrites(pReNative, off);
6548
6549#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6550 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6551#else
6552 RT_NOREF(idxInstr);
6553#endif
6554
6555 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6556 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6557 kIemNativeGstRegUse_ReadOnly);
6558
6559 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6560 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6561
6562 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6563 return off;
6564}
6565
6566
6567/**
6568 * Emits a call to a CImpl function or something similar.
6569 */
6570DECL_HIDDEN_THROW(uint32_t)
6571iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6572 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6573{
6574 /* Writeback everything. */
6575 off = iemNativeRegFlushPendingWrites(pReNative, off);
6576
6577 /*
6578 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6579 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6580 */
6581 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6582 fGstShwFlush
6583 | RT_BIT_64(kIemNativeGstReg_Pc)
6584 | RT_BIT_64(kIemNativeGstReg_EFlags));
6585 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6586
6587 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6588
6589 /*
6590 * Load the parameters.
6591 */
6592#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6593 /* Special code the hidden VBOXSTRICTRC pointer. */
6594 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6595 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6596 if (cAddParams > 0)
6597 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6598 if (cAddParams > 1)
6599 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6600 if (cAddParams > 2)
6601 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6602 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6603
6604#else
6605 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6606 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6607 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6608 if (cAddParams > 0)
6609 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6610 if (cAddParams > 1)
6611 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6612 if (cAddParams > 2)
6613# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6614 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6615# else
6616 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6617# endif
6618#endif
6619
6620 /*
6621 * Make the call.
6622 */
6623 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6624
6625#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6626 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6627#endif
6628
6629 /*
6630 * Check the status code.
6631 */
6632 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6633}
6634
6635
6636/**
6637 * Emits a call to a threaded worker function.
6638 */
6639DECL_HIDDEN_THROW(uint32_t)
6640iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6641{
6642 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6643
6644 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6645 off = iemNativeRegFlushPendingWrites(pReNative, off);
6646
6647 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6648 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6649
6650#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6651 /* The threaded function may throw / long jmp, so set current instruction
6652 number if we're counting. */
6653 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6654#endif
6655
6656 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6657
6658#ifdef RT_ARCH_AMD64
6659 /* Load the parameters and emit the call. */
6660# ifdef RT_OS_WINDOWS
6661# ifndef VBOXSTRICTRC_STRICT_ENABLED
6662 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6663 if (cParams > 0)
6664 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6665 if (cParams > 1)
6666 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6667 if (cParams > 2)
6668 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6669# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6670 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6671 if (cParams > 0)
6672 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6673 if (cParams > 1)
6674 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6675 if (cParams > 2)
6676 {
6677 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6678 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6679 }
6680 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6681# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6682# else
6683 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6684 if (cParams > 0)
6685 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6686 if (cParams > 1)
6687 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6688 if (cParams > 2)
6689 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6690# endif
6691
6692 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6693
6694# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6695 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6696# endif
6697
6698#elif RT_ARCH_ARM64
6699 /*
6700 * ARM64:
6701 */
6702 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6703 if (cParams > 0)
6704 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6705 if (cParams > 1)
6706 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6707 if (cParams > 2)
6708 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6709
6710 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6711
6712#else
6713# error "port me"
6714#endif
6715
6716 /*
6717 * Check the status code.
6718 */
6719 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6720
6721 return off;
6722}
6723
6724#ifdef VBOX_WITH_STATISTICS
6725/**
6726 * Emits code to update the thread call statistics.
6727 */
6728DECL_INLINE_THROW(uint32_t)
6729iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6730{
6731 /*
6732 * Update threaded function stats.
6733 */
6734 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6735 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6736# if defined(RT_ARCH_ARM64)
6737 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6738 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6739 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6740 iemNativeRegFreeTmp(pReNative, idxTmp1);
6741 iemNativeRegFreeTmp(pReNative, idxTmp2);
6742# else
6743 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6744# endif
6745 return off;
6746}
6747#endif /* VBOX_WITH_STATISTICS */
6748
6749
6750/**
6751 * Emits the code at the ReturnWithFlags label (returns
6752 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6753 */
6754static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6755{
6756 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6757 if (idxLabel != UINT32_MAX)
6758 {
6759 iemNativeLabelDefine(pReNative, idxLabel, off);
6760
6761 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6762
6763 /* jump back to the return sequence. */
6764 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6765 }
6766 return off;
6767}
6768
6769
6770/**
6771 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6772 */
6773static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6774{
6775 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6776 if (idxLabel != UINT32_MAX)
6777 {
6778 iemNativeLabelDefine(pReNative, idxLabel, off);
6779
6780 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6781
6782 /* jump back to the return sequence. */
6783 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6784 }
6785 return off;
6786}
6787
6788
6789/**
6790 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6791 */
6792static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6793{
6794 /*
6795 * Generate the rc + rcPassUp fiddling code if needed.
6796 */
6797 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6798 if (idxLabel != UINT32_MAX)
6799 {
6800 iemNativeLabelDefine(pReNative, idxLabel, off);
6801
6802 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6803#ifdef RT_ARCH_AMD64
6804# ifdef RT_OS_WINDOWS
6805# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6806 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6807# endif
6808 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6809 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6810# else
6811 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6812 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6813# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6814 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6815# endif
6816# endif
6817# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6818 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6819# endif
6820
6821#else
6822 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6823 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6824 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6825#endif
6826
6827 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6828 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6829 }
6830 return off;
6831}
6832
6833
6834/**
6835 * Emits a standard epilog.
6836 */
6837static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6838{
6839 *pidxReturnLabel = UINT32_MAX;
6840
6841 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6842 off = iemNativeRegFlushPendingWrites(pReNative, off);
6843
6844 /*
6845 * Successful return, so clear the return register (eax, w0).
6846 */
6847 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6848
6849 /*
6850 * Define label for common return point.
6851 */
6852 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6853 *pidxReturnLabel = idxReturn;
6854
6855 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6856
6857 /*
6858 * Restore registers and return.
6859 */
6860#ifdef RT_ARCH_AMD64
6861 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6862
6863 /* Reposition esp at the r15 restore point. */
6864 pbCodeBuf[off++] = X86_OP_REX_W;
6865 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6866 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6867 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6868
6869 /* Pop non-volatile registers and return */
6870 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6871 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6872 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6873 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6874 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6875 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6876 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6877 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6878# ifdef RT_OS_WINDOWS
6879 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6880 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6881# endif
6882 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6883 pbCodeBuf[off++] = 0xc9; /* leave */
6884 pbCodeBuf[off++] = 0xc3; /* ret */
6885 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6886
6887#elif RT_ARCH_ARM64
6888 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6889
6890 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6891 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6892 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6893 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6894 IEMNATIVE_FRAME_VAR_SIZE / 8);
6895 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6896 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6897 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6898 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6899 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6900 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6901 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6902 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6903 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6904 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6905 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6906 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6907
6908 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6909 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6910 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6911 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6912
6913 /* retab / ret */
6914# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6915 if (1)
6916 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6917 else
6918# endif
6919 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6920
6921#else
6922# error "port me"
6923#endif
6924 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6925
6926 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6927}
6928
6929
6930/**
6931 * Emits a standard prolog.
6932 */
6933static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6934{
6935#ifdef RT_ARCH_AMD64
6936 /*
6937 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6938 * reserving 64 bytes for stack variables plus 4 non-register argument
6939 * slots. Fixed register assignment: xBX = pReNative;
6940 *
6941 * Since we always do the same register spilling, we can use the same
6942 * unwind description for all the code.
6943 */
6944 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6945 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6946 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6947 pbCodeBuf[off++] = 0x8b;
6948 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6949 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6950 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6951# ifdef RT_OS_WINDOWS
6952 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6953 pbCodeBuf[off++] = 0x8b;
6954 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6955 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6956 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6957# else
6958 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6959 pbCodeBuf[off++] = 0x8b;
6960 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6961# endif
6962 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6963 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6964 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6965 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6966 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6967 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6968 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6969 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6970
6971# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6972 /* Save the frame pointer. */
6973 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6974# endif
6975
6976 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6977 X86_GREG_xSP,
6978 IEMNATIVE_FRAME_ALIGN_SIZE
6979 + IEMNATIVE_FRAME_VAR_SIZE
6980 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6981 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6982 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6983 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6984 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6985
6986#elif RT_ARCH_ARM64
6987 /*
6988 * We set up a stack frame exactly like on x86, only we have to push the
6989 * return address our selves here. We save all non-volatile registers.
6990 */
6991 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6992
6993# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6994 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6995 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6996 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6997 /* pacibsp */
6998 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6999# endif
7000
7001 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7002 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7003 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7004 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7005 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7006 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7007 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7008 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7009 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7010 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7011 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7012 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7013 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7014 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7015 /* Save the BP and LR (ret address) registers at the top of the frame. */
7016 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7017 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7018 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7019 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7020 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7021 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7022
7023 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7024 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7025
7026 /* mov r28, r0 */
7027 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7028 /* mov r27, r1 */
7029 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7030
7031# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7032 /* Save the frame pointer. */
7033 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7034 ARMV8_A64_REG_X2);
7035# endif
7036
7037#else
7038# error "port me"
7039#endif
7040 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7041 return off;
7042}
7043
7044
7045/*********************************************************************************************************************************
7046* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7047*********************************************************************************************************************************/
7048
7049/**
7050 * Internal work that allocates a variable with kind set to
7051 * kIemNativeVarKind_Invalid and no current stack allocation.
7052 *
7053 * The kind will either be set by the caller or later when the variable is first
7054 * assigned a value.
7055 *
7056 * @returns Unpacked index.
7057 * @internal
7058 */
7059static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7060{
7061 Assert(cbType > 0 && cbType <= 64);
7062 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7063 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7064 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7065 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7066 pReNative->Core.aVars[idxVar].cbVar = cbType;
7067 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7068 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7069 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7070 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7071 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7072 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7073 pReNative->Core.aVars[idxVar].u.uValue = 0;
7074#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7075 pReNative->Core.aVars[idxVar].fSimdReg = false;
7076#endif
7077 return idxVar;
7078}
7079
7080
7081/**
7082 * Internal work that allocates an argument variable w/o setting enmKind.
7083 *
7084 * @returns Unpacked index.
7085 * @internal
7086 */
7087static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7088{
7089 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7090 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7091 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7092
7093 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7094 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7095 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7096 return idxVar;
7097}
7098
7099
7100/**
7101 * Gets the stack slot for a stack variable, allocating one if necessary.
7102 *
7103 * Calling this function implies that the stack slot will contain a valid
7104 * variable value. The caller deals with any register currently assigned to the
7105 * variable, typically by spilling it into the stack slot.
7106 *
7107 * @returns The stack slot number.
7108 * @param pReNative The recompiler state.
7109 * @param idxVar The variable.
7110 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7111 */
7112DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7113{
7114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7115 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7116 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7117
7118 /* Already got a slot? */
7119 uint8_t const idxStackSlot = pVar->idxStackSlot;
7120 if (idxStackSlot != UINT8_MAX)
7121 {
7122 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7123 return idxStackSlot;
7124 }
7125
7126 /*
7127 * A single slot is easy to allocate.
7128 * Allocate them from the top end, closest to BP, to reduce the displacement.
7129 */
7130 if (pVar->cbVar <= sizeof(uint64_t))
7131 {
7132 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7133 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7134 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7135 pVar->idxStackSlot = (uint8_t)iSlot;
7136 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7137 return (uint8_t)iSlot;
7138 }
7139
7140 /*
7141 * We need more than one stack slot.
7142 *
7143 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7144 */
7145 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7146 Assert(pVar->cbVar <= 64);
7147 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7148 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7149 uint32_t bmStack = pReNative->Core.bmStack;
7150 while (bmStack != UINT32_MAX)
7151 {
7152 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7153 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7154 iSlot = (iSlot - 1) & ~fBitAlignMask;
7155 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7156 {
7157 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7158 pVar->idxStackSlot = (uint8_t)iSlot;
7159 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7160 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7161 return (uint8_t)iSlot;
7162 }
7163
7164 bmStack |= (fBitAllocMask << iSlot);
7165 }
7166 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7167}
7168
7169
7170/**
7171 * Changes the variable to a stack variable.
7172 *
7173 * Currently this is s only possible to do the first time the variable is used,
7174 * switching later is can be implemented but not done.
7175 *
7176 * @param pReNative The recompiler state.
7177 * @param idxVar The variable.
7178 * @throws VERR_IEM_VAR_IPE_2
7179 */
7180DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7181{
7182 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7183 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7184 if (pVar->enmKind != kIemNativeVarKind_Stack)
7185 {
7186 /* We could in theory transition from immediate to stack as well, but it
7187 would involve the caller doing work storing the value on the stack. So,
7188 till that's required we only allow transition from invalid. */
7189 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7190 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7191 pVar->enmKind = kIemNativeVarKind_Stack;
7192
7193 /* Note! We don't allocate a stack slot here, that's only done when a
7194 slot is actually needed to hold a variable value. */
7195 }
7196}
7197
7198
7199/**
7200 * Sets it to a variable with a constant value.
7201 *
7202 * This does not require stack storage as we know the value and can always
7203 * reload it, unless of course it's referenced.
7204 *
7205 * @param pReNative The recompiler state.
7206 * @param idxVar The variable.
7207 * @param uValue The immediate value.
7208 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7209 */
7210DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7211{
7212 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7213 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7214 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7215 {
7216 /* Only simple transitions for now. */
7217 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7218 pVar->enmKind = kIemNativeVarKind_Immediate;
7219 }
7220 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7221
7222 pVar->u.uValue = uValue;
7223 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7224 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7225 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7226}
7227
7228
7229/**
7230 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7231 *
7232 * This does not require stack storage as we know the value and can always
7233 * reload it. Loading is postponed till needed.
7234 *
7235 * @param pReNative The recompiler state.
7236 * @param idxVar The variable. Unpacked.
7237 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7238 *
7239 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7240 * @internal
7241 */
7242static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7243{
7244 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7245 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7246
7247 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7248 {
7249 /* Only simple transitions for now. */
7250 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7251 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7252 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7253 }
7254 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7255
7256 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7257
7258 /* Update the other variable, ensure it's a stack variable. */
7259 /** @todo handle variables with const values... that'll go boom now. */
7260 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7261 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7262}
7263
7264
7265/**
7266 * Sets the variable to a reference (pointer) to a guest register reference.
7267 *
7268 * This does not require stack storage as we know the value and can always
7269 * reload it. Loading is postponed till needed.
7270 *
7271 * @param pReNative The recompiler state.
7272 * @param idxVar The variable.
7273 * @param enmRegClass The class guest registers to reference.
7274 * @param idxReg The register within @a enmRegClass to reference.
7275 *
7276 * @throws VERR_IEM_VAR_IPE_2
7277 */
7278DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7279 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7280{
7281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7282 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7283
7284 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7285 {
7286 /* Only simple transitions for now. */
7287 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7288 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7289 }
7290 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7291
7292 pVar->u.GstRegRef.enmClass = enmRegClass;
7293 pVar->u.GstRegRef.idx = idxReg;
7294}
7295
7296
7297DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7298{
7299 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7300}
7301
7302
7303DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7304{
7305 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7306
7307 /* Since we're using a generic uint64_t value type, we must truncate it if
7308 the variable is smaller otherwise we may end up with too large value when
7309 scaling up a imm8 w/ sign-extension.
7310
7311 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7312 in the bios, bx=1) when running on arm, because clang expect 16-bit
7313 register parameters to have bits 16 and up set to zero. Instead of
7314 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7315 CF value in the result. */
7316 switch (cbType)
7317 {
7318 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7319 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7320 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7321 }
7322 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7323 return idxVar;
7324}
7325
7326
7327DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7328{
7329 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7330 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7331 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7332 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7333 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7334 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7335
7336 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7337 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7338 return idxArgVar;
7339}
7340
7341
7342DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7343{
7344 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7345 /* Don't set to stack now, leave that to the first use as for instance
7346 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7347 return idxVar;
7348}
7349
7350
7351DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7352{
7353 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7354
7355 /* Since we're using a generic uint64_t value type, we must truncate it if
7356 the variable is smaller otherwise we may end up with too large value when
7357 scaling up a imm8 w/ sign-extension. */
7358 switch (cbType)
7359 {
7360 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7361 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7362 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7363 }
7364 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7365 return idxVar;
7366}
7367
7368
7369/**
7370 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7371 * fixed till we call iemNativeVarRegisterRelease.
7372 *
7373 * @returns The host register number.
7374 * @param pReNative The recompiler state.
7375 * @param idxVar The variable.
7376 * @param poff Pointer to the instruction buffer offset.
7377 * In case a register needs to be freed up or the value
7378 * loaded off the stack.
7379 * @param fInitialized Set if the variable must already have been initialized.
7380 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7381 * the case.
7382 * @param idxRegPref Preferred register number or UINT8_MAX.
7383 */
7384DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7385 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7386{
7387 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7388 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7389 Assert(pVar->cbVar <= 8);
7390 Assert(!pVar->fRegAcquired);
7391
7392 uint8_t idxReg = pVar->idxReg;
7393 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7394 {
7395 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7396 && pVar->enmKind < kIemNativeVarKind_End);
7397 pVar->fRegAcquired = true;
7398 return idxReg;
7399 }
7400
7401 /*
7402 * If the kind of variable has not yet been set, default to 'stack'.
7403 */
7404 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7405 && pVar->enmKind < kIemNativeVarKind_End);
7406 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7407 iemNativeVarSetKindToStack(pReNative, idxVar);
7408
7409 /*
7410 * We have to allocate a register for the variable, even if its a stack one
7411 * as we don't know if there are modification being made to it before its
7412 * finalized (todo: analyze and insert hints about that?).
7413 *
7414 * If we can, we try get the correct register for argument variables. This
7415 * is assuming that most argument variables are fetched as close as possible
7416 * to the actual call, so that there aren't any interfering hidden calls
7417 * (memory accesses, etc) inbetween.
7418 *
7419 * If we cannot or it's a variable, we make sure no argument registers
7420 * that will be used by this MC block will be allocated here, and we always
7421 * prefer non-volatile registers to avoid needing to spill stuff for internal
7422 * call.
7423 */
7424 /** @todo Detect too early argument value fetches and warn about hidden
7425 * calls causing less optimal code to be generated in the python script. */
7426
7427 uint8_t const uArgNo = pVar->uArgNo;
7428 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7429 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7430 {
7431 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7432 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7433 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7434 }
7435 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7436 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7437 {
7438 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7439 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7440 & ~pReNative->Core.bmHstRegsWithGstShadow
7441 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7442 & fNotArgsMask;
7443 if (fRegs)
7444 {
7445 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7446 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7447 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7448 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7449 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7450 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7451 }
7452 else
7453 {
7454 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7455 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7456 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7457 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7458 }
7459 }
7460 else
7461 {
7462 idxReg = idxRegPref;
7463 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7464 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7465 }
7466 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7467 pVar->idxReg = idxReg;
7468
7469#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7470 pVar->fSimdReg = false;
7471#endif
7472
7473 /*
7474 * Load it off the stack if we've got a stack slot.
7475 */
7476 uint8_t const idxStackSlot = pVar->idxStackSlot;
7477 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7478 {
7479 Assert(fInitialized);
7480 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7481 switch (pVar->cbVar)
7482 {
7483 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7484 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7485 case 3: AssertFailed(); RT_FALL_THRU();
7486 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7487 default: AssertFailed(); RT_FALL_THRU();
7488 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7489 }
7490 }
7491 else
7492 {
7493 Assert(idxStackSlot == UINT8_MAX);
7494 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7495 }
7496 pVar->fRegAcquired = true;
7497 return idxReg;
7498}
7499
7500
7501#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7502/**
7503 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7504 * fixed till we call iemNativeVarRegisterRelease.
7505 *
7506 * @returns The host register number.
7507 * @param pReNative The recompiler state.
7508 * @param idxVar The variable.
7509 * @param poff Pointer to the instruction buffer offset.
7510 * In case a register needs to be freed up or the value
7511 * loaded off the stack.
7512 * @param fInitialized Set if the variable must already have been initialized.
7513 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7514 * the case.
7515 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7516 */
7517DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7518 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7519{
7520 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7521 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7522 Assert( pVar->cbVar == sizeof(RTUINT128U)
7523 || pVar->cbVar == sizeof(RTUINT256U));
7524 Assert(!pVar->fRegAcquired);
7525
7526 uint8_t idxReg = pVar->idxReg;
7527 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7528 {
7529 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7530 && pVar->enmKind < kIemNativeVarKind_End);
7531 pVar->fRegAcquired = true;
7532 return idxReg;
7533 }
7534
7535 /*
7536 * If the kind of variable has not yet been set, default to 'stack'.
7537 */
7538 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7539 && pVar->enmKind < kIemNativeVarKind_End);
7540 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7541 iemNativeVarSetKindToStack(pReNative, idxVar);
7542
7543 /*
7544 * We have to allocate a register for the variable, even if its a stack one
7545 * as we don't know if there are modification being made to it before its
7546 * finalized (todo: analyze and insert hints about that?).
7547 *
7548 * If we can, we try get the correct register for argument variables. This
7549 * is assuming that most argument variables are fetched as close as possible
7550 * to the actual call, so that there aren't any interfering hidden calls
7551 * (memory accesses, etc) inbetween.
7552 *
7553 * If we cannot or it's a variable, we make sure no argument registers
7554 * that will be used by this MC block will be allocated here, and we always
7555 * prefer non-volatile registers to avoid needing to spill stuff for internal
7556 * call.
7557 */
7558 /** @todo Detect too early argument value fetches and warn about hidden
7559 * calls causing less optimal code to be generated in the python script. */
7560
7561 uint8_t const uArgNo = pVar->uArgNo;
7562 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7563
7564 /* SIMD is bit simpler for now because there is no support for arguments. */
7565 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7566 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7567 {
7568 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7569 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7570 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7571 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7572 & fNotArgsMask;
7573 if (fRegs)
7574 {
7575 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7576 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7577 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7578 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7579 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7580 }
7581 else
7582 {
7583 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7584 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7585 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7586 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7587 }
7588 }
7589 else
7590 {
7591 idxReg = idxRegPref;
7592 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7593 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7594 }
7595 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7596
7597 pVar->fSimdReg = true;
7598 pVar->idxReg = idxReg;
7599
7600 /*
7601 * Load it off the stack if we've got a stack slot.
7602 */
7603 uint8_t const idxStackSlot = pVar->idxStackSlot;
7604 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7605 {
7606 Assert(fInitialized);
7607 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7608 switch (pVar->cbVar)
7609 {
7610 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7611 default: AssertFailed(); RT_FALL_THRU();
7612 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7613 }
7614 }
7615 else
7616 {
7617 Assert(idxStackSlot == UINT8_MAX);
7618 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7619 }
7620 pVar->fRegAcquired = true;
7621 return idxReg;
7622}
7623#endif
7624
7625
7626/**
7627 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7628 * guest register.
7629 *
7630 * This function makes sure there is a register for it and sets it to be the
7631 * current shadow copy of @a enmGstReg.
7632 *
7633 * @returns The host register number.
7634 * @param pReNative The recompiler state.
7635 * @param idxVar The variable.
7636 * @param enmGstReg The guest register this variable will be written to
7637 * after this call.
7638 * @param poff Pointer to the instruction buffer offset.
7639 * In case a register needs to be freed up or if the
7640 * variable content needs to be loaded off the stack.
7641 *
7642 * @note We DO NOT expect @a idxVar to be an argument variable,
7643 * because we can only in the commit stage of an instruction when this
7644 * function is used.
7645 */
7646DECL_HIDDEN_THROW(uint8_t)
7647iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7648{
7649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7650 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7651 Assert(!pVar->fRegAcquired);
7652 AssertMsgStmt( pVar->cbVar <= 8
7653 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7654 || pVar->enmKind == kIemNativeVarKind_Stack),
7655 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7656 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7657 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7658
7659 /*
7660 * This shouldn't ever be used for arguments, unless it's in a weird else
7661 * branch that doesn't do any calling and even then it's questionable.
7662 *
7663 * However, in case someone writes crazy wrong MC code and does register
7664 * updates before making calls, just use the regular register allocator to
7665 * ensure we get a register suitable for the intended argument number.
7666 */
7667 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7668
7669 /*
7670 * If there is already a register for the variable, we transfer/set the
7671 * guest shadow copy assignment to it.
7672 */
7673 uint8_t idxReg = pVar->idxReg;
7674 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7675 {
7676 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7677 {
7678 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7679 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7680 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7681 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7682 }
7683 else
7684 {
7685 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7686 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7687 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7688 }
7689 /** @todo figure this one out. We need some way of making sure the register isn't
7690 * modified after this point, just in case we start writing crappy MC code. */
7691 pVar->enmGstReg = enmGstReg;
7692 pVar->fRegAcquired = true;
7693 return idxReg;
7694 }
7695 Assert(pVar->uArgNo == UINT8_MAX);
7696
7697 /*
7698 * Because this is supposed to be the commit stage, we're just tag along with the
7699 * temporary register allocator and upgrade it to a variable register.
7700 */
7701 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7702 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7703 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7704 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7705 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7706 pVar->idxReg = idxReg;
7707
7708 /*
7709 * Now we need to load the register value.
7710 */
7711 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7712 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7713 else
7714 {
7715 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7716 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7717 switch (pVar->cbVar)
7718 {
7719 case sizeof(uint64_t):
7720 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7721 break;
7722 case sizeof(uint32_t):
7723 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7724 break;
7725 case sizeof(uint16_t):
7726 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7727 break;
7728 case sizeof(uint8_t):
7729 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7730 break;
7731 default:
7732 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7733 }
7734 }
7735
7736 pVar->fRegAcquired = true;
7737 return idxReg;
7738}
7739
7740
7741/**
7742 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7743 *
7744 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7745 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7746 * requirement of flushing anything in volatile host registers when making a
7747 * call.
7748 *
7749 * @returns New @a off value.
7750 * @param pReNative The recompiler state.
7751 * @param off The code buffer position.
7752 * @param fHstRegsNotToSave Set of registers not to save & restore.
7753 */
7754DECL_HIDDEN_THROW(uint32_t)
7755iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7756{
7757 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7758 if (fHstRegs)
7759 {
7760 do
7761 {
7762 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7763 fHstRegs &= ~RT_BIT_32(idxHstReg);
7764
7765 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7766 {
7767 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7768 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7769 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7770 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7771 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7772 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7773 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7774 {
7775 case kIemNativeVarKind_Stack:
7776 {
7777 /* Temporarily spill the variable register. */
7778 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7779 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7780 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7781 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7782 continue;
7783 }
7784
7785 case kIemNativeVarKind_Immediate:
7786 case kIemNativeVarKind_VarRef:
7787 case kIemNativeVarKind_GstRegRef:
7788 /* It is weird to have any of these loaded at this point. */
7789 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7790 continue;
7791
7792 case kIemNativeVarKind_End:
7793 case kIemNativeVarKind_Invalid:
7794 break;
7795 }
7796 AssertFailed();
7797 }
7798 else
7799 {
7800 /*
7801 * Allocate a temporary stack slot and spill the register to it.
7802 */
7803 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7804 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7805 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7806 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7807 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7808 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7809 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7810 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7811 }
7812 } while (fHstRegs);
7813 }
7814#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7815 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7816 if (fHstRegs)
7817 {
7818 do
7819 {
7820 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7821 fHstRegs &= ~RT_BIT_32(idxHstReg);
7822
7823 /*
7824 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7825 * which would be more difficult due to spanning multiple stack slots and different sizes
7826 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7827 * don't need saving.
7828 */
7829 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7830 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7831 continue;
7832
7833 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7834
7835 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7836 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7837 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7838 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7839 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7840 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7841 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7842 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7843 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7844 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7845 {
7846 case kIemNativeVarKind_Stack:
7847 {
7848 /* Temporarily spill the variable register. */
7849 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7850 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7851 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7852 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7853 if (cbVar == sizeof(RTUINT128U))
7854 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7855 else
7856 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7857 continue;
7858 }
7859
7860 case kIemNativeVarKind_Immediate:
7861 case kIemNativeVarKind_VarRef:
7862 case kIemNativeVarKind_GstRegRef:
7863 /* It is weird to have any of these loaded at this point. */
7864 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7865 continue;
7866
7867 case kIemNativeVarKind_End:
7868 case kIemNativeVarKind_Invalid:
7869 break;
7870 }
7871 AssertFailed();
7872 } while (fHstRegs);
7873 }
7874#endif
7875 return off;
7876}
7877
7878
7879/**
7880 * Emit code to restore volatile registers after to a call to a helper.
7881 *
7882 * @returns New @a off value.
7883 * @param pReNative The recompiler state.
7884 * @param off The code buffer position.
7885 * @param fHstRegsNotToSave Set of registers not to save & restore.
7886 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7887 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7888 */
7889DECL_HIDDEN_THROW(uint32_t)
7890iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7891{
7892 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7893 if (fHstRegs)
7894 {
7895 do
7896 {
7897 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7898 fHstRegs &= ~RT_BIT_32(idxHstReg);
7899
7900 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7901 {
7902 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7903 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7904 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7905 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7906 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7907 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7908 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7909 {
7910 case kIemNativeVarKind_Stack:
7911 {
7912 /* Unspill the variable register. */
7913 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7914 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7915 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7916 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7917 continue;
7918 }
7919
7920 case kIemNativeVarKind_Immediate:
7921 case kIemNativeVarKind_VarRef:
7922 case kIemNativeVarKind_GstRegRef:
7923 /* It is weird to have any of these loaded at this point. */
7924 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7925 continue;
7926
7927 case kIemNativeVarKind_End:
7928 case kIemNativeVarKind_Invalid:
7929 break;
7930 }
7931 AssertFailed();
7932 }
7933 else
7934 {
7935 /*
7936 * Restore from temporary stack slot.
7937 */
7938 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7939 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7940 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7941 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7942
7943 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7944 }
7945 } while (fHstRegs);
7946 }
7947#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7948 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7949 if (fHstRegs)
7950 {
7951 do
7952 {
7953 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7954 fHstRegs &= ~RT_BIT_32(idxHstReg);
7955
7956 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7957 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7958 continue;
7959 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7960
7961 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7962 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7963 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7964 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7965 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7966 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7967 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7968 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7969 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7970 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7971 {
7972 case kIemNativeVarKind_Stack:
7973 {
7974 /* Unspill the variable register. */
7975 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7976 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7977 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7978 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7979
7980 if (cbVar == sizeof(RTUINT128U))
7981 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7982 else
7983 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7984 continue;
7985 }
7986
7987 case kIemNativeVarKind_Immediate:
7988 case kIemNativeVarKind_VarRef:
7989 case kIemNativeVarKind_GstRegRef:
7990 /* It is weird to have any of these loaded at this point. */
7991 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7992 continue;
7993
7994 case kIemNativeVarKind_End:
7995 case kIemNativeVarKind_Invalid:
7996 break;
7997 }
7998 AssertFailed();
7999 } while (fHstRegs);
8000 }
8001#endif
8002 return off;
8003}
8004
8005
8006/**
8007 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8008 *
8009 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8010 *
8011 * ASSUMES that @a idxVar is valid and unpacked.
8012 */
8013DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8014{
8015 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8016 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8017 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8018 {
8019 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8020 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8021 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8022 Assert(cSlots > 0);
8023 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8024 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8025 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8026 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8027 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8028 }
8029 else
8030 Assert(idxStackSlot == UINT8_MAX);
8031}
8032
8033
8034/**
8035 * Worker that frees a single variable.
8036 *
8037 * ASSUMES that @a idxVar is valid and unpacked.
8038 */
8039DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8040{
8041 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8042 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8043 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8044
8045 /* Free the host register first if any assigned. */
8046 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8047#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8048 if ( idxHstReg != UINT8_MAX
8049 && pReNative->Core.aVars[idxVar].fSimdReg)
8050 {
8051 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8052 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8053 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8054 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8055 }
8056 else
8057#endif
8058 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8059 {
8060 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8061 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8062 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8063 }
8064
8065 /* Free argument mapping. */
8066 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8067 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8068 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8069
8070 /* Free the stack slots. */
8071 iemNativeVarFreeStackSlots(pReNative, idxVar);
8072
8073 /* Free the actual variable. */
8074 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8075 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8076}
8077
8078
8079/**
8080 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8081 */
8082DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8083{
8084 while (bmVars != 0)
8085 {
8086 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8087 bmVars &= ~RT_BIT_32(idxVar);
8088
8089#if 1 /** @todo optimize by simplifying this later... */
8090 iemNativeVarFreeOneWorker(pReNative, idxVar);
8091#else
8092 /* Only need to free the host register, the rest is done as bulk updates below. */
8093 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8094 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8095 {
8096 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8097 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8098 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8099 }
8100#endif
8101 }
8102#if 0 /** @todo optimize by simplifying this later... */
8103 pReNative->Core.bmVars = 0;
8104 pReNative->Core.bmStack = 0;
8105 pReNative->Core.u64ArgVars = UINT64_MAX;
8106#endif
8107}
8108
8109
8110
8111/*********************************************************************************************************************************
8112* Emitters for IEM_MC_CALL_CIMPL_XXX *
8113*********************************************************************************************************************************/
8114
8115/**
8116 * Emits code to load a reference to the given guest register into @a idxGprDst.
8117 */
8118DECL_HIDDEN_THROW(uint32_t)
8119iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8120 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8121{
8122#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8123 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8124#endif
8125
8126 /*
8127 * Get the offset relative to the CPUMCTX structure.
8128 */
8129 uint32_t offCpumCtx;
8130 switch (enmClass)
8131 {
8132 case kIemNativeGstRegRef_Gpr:
8133 Assert(idxRegInClass < 16);
8134 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8135 break;
8136
8137 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8138 Assert(idxRegInClass < 4);
8139 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8140 break;
8141
8142 case kIemNativeGstRegRef_EFlags:
8143 Assert(idxRegInClass == 0);
8144 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8145 break;
8146
8147 case kIemNativeGstRegRef_MxCsr:
8148 Assert(idxRegInClass == 0);
8149 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8150 break;
8151
8152 case kIemNativeGstRegRef_FpuReg:
8153 Assert(idxRegInClass < 8);
8154 AssertFailed(); /** @todo what kind of indexing? */
8155 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8156 break;
8157
8158 case kIemNativeGstRegRef_MReg:
8159 Assert(idxRegInClass < 8);
8160 AssertFailed(); /** @todo what kind of indexing? */
8161 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8162 break;
8163
8164 case kIemNativeGstRegRef_XReg:
8165 Assert(idxRegInClass < 16);
8166 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8167 break;
8168
8169 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8170 Assert(idxRegInClass == 0);
8171 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8172 break;
8173
8174 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8175 Assert(idxRegInClass == 0);
8176 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8177 break;
8178
8179 default:
8180 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8181 }
8182
8183 /*
8184 * Load the value into the destination register.
8185 */
8186#ifdef RT_ARCH_AMD64
8187 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8188
8189#elif defined(RT_ARCH_ARM64)
8190 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8191 Assert(offCpumCtx < 4096);
8192 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8193
8194#else
8195# error "Port me!"
8196#endif
8197
8198 return off;
8199}
8200
8201
8202/**
8203 * Common code for CIMPL and AIMPL calls.
8204 *
8205 * These are calls that uses argument variables and such. They should not be
8206 * confused with internal calls required to implement an MC operation,
8207 * like a TLB load and similar.
8208 *
8209 * Upon return all that is left to do is to load any hidden arguments and
8210 * perform the call. All argument variables are freed.
8211 *
8212 * @returns New code buffer offset; throws VBox status code on error.
8213 * @param pReNative The native recompile state.
8214 * @param off The code buffer offset.
8215 * @param cArgs The total nubmer of arguments (includes hidden
8216 * count).
8217 * @param cHiddenArgs The number of hidden arguments. The hidden
8218 * arguments must not have any variable declared for
8219 * them, whereas all the regular arguments must
8220 * (tstIEMCheckMc ensures this).
8221 */
8222DECL_HIDDEN_THROW(uint32_t)
8223iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8224{
8225#ifdef VBOX_STRICT
8226 /*
8227 * Assert sanity.
8228 */
8229 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8230 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8231 for (unsigned i = 0; i < cHiddenArgs; i++)
8232 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8233 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8234 {
8235 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8236 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8237 }
8238 iemNativeRegAssertSanity(pReNative);
8239#endif
8240
8241 /* We don't know what the called function makes use of, so flush any pending register writes. */
8242 off = iemNativeRegFlushPendingWrites(pReNative, off);
8243
8244 /*
8245 * Before we do anything else, go over variables that are referenced and
8246 * make sure they are not in a register.
8247 */
8248 uint32_t bmVars = pReNative->Core.bmVars;
8249 if (bmVars)
8250 {
8251 do
8252 {
8253 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8254 bmVars &= ~RT_BIT_32(idxVar);
8255
8256 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8257 {
8258 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8259#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8260 if ( idxRegOld != UINT8_MAX
8261 && pReNative->Core.aVars[idxVar].fSimdReg)
8262 {
8263 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8264 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8265
8266 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8267 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8268 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8269 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8270 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8271 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8272 else
8273 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8274
8275 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8276 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8277
8278 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8279 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8280 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8281 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8282 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8283 }
8284 else
8285#endif
8286 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8287 {
8288 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8289 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8290 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8291 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8292 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8293
8294 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8295 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8296 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8297 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8298 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8299 }
8300 }
8301 } while (bmVars != 0);
8302#if 0 //def VBOX_STRICT
8303 iemNativeRegAssertSanity(pReNative);
8304#endif
8305 }
8306
8307 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8308
8309 /*
8310 * First, go over the host registers that will be used for arguments and make
8311 * sure they either hold the desired argument or are free.
8312 */
8313 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8314 {
8315 for (uint32_t i = 0; i < cRegArgs; i++)
8316 {
8317 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8318 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8319 {
8320 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8321 {
8322 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8323 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8324 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8325 Assert(pVar->idxReg == idxArgReg);
8326 uint8_t const uArgNo = pVar->uArgNo;
8327 if (uArgNo == i)
8328 { /* prefect */ }
8329 /* The variable allocator logic should make sure this is impossible,
8330 except for when the return register is used as a parameter (ARM,
8331 but not x86). */
8332#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8333 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8334 {
8335# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8336# error "Implement this"
8337# endif
8338 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8339 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8340 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8342 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8343 }
8344#endif
8345 else
8346 {
8347 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8348
8349 if (pVar->enmKind == kIemNativeVarKind_Stack)
8350 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8351 else
8352 {
8353 /* just free it, can be reloaded if used again */
8354 pVar->idxReg = UINT8_MAX;
8355 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8356 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8357 }
8358 }
8359 }
8360 else
8361 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8362 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8363 }
8364 }
8365#if 0 //def VBOX_STRICT
8366 iemNativeRegAssertSanity(pReNative);
8367#endif
8368 }
8369
8370 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8371
8372#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8373 /*
8374 * If there are any stack arguments, make sure they are in their place as well.
8375 *
8376 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8377 * the caller) be loading it later and it must be free (see first loop).
8378 */
8379 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8380 {
8381 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8382 {
8383 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8384 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8385 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8386 {
8387 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8388 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8389 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8390 pVar->idxReg = UINT8_MAX;
8391 }
8392 else
8393 {
8394 /* Use ARG0 as temp for stuff we need registers for. */
8395 switch (pVar->enmKind)
8396 {
8397 case kIemNativeVarKind_Stack:
8398 {
8399 uint8_t const idxStackSlot = pVar->idxStackSlot;
8400 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8401 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8402 iemNativeStackCalcBpDisp(idxStackSlot));
8403 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8404 continue;
8405 }
8406
8407 case kIemNativeVarKind_Immediate:
8408 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8409 continue;
8410
8411 case kIemNativeVarKind_VarRef:
8412 {
8413 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8414 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8415 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8416 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8417 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8418# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8419 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8420 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8421 if ( fSimdReg
8422 && idxRegOther != UINT8_MAX)
8423 {
8424 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8425 if (cbVar == sizeof(RTUINT128U))
8426 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8427 else
8428 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8429 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8430 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8431 }
8432 else
8433# endif
8434 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8435 {
8436 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8437 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8438 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8439 }
8440 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8441 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8442 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8443 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8444 continue;
8445 }
8446
8447 case kIemNativeVarKind_GstRegRef:
8448 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8449 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8450 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8451 continue;
8452
8453 case kIemNativeVarKind_Invalid:
8454 case kIemNativeVarKind_End:
8455 break;
8456 }
8457 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8458 }
8459 }
8460# if 0 //def VBOX_STRICT
8461 iemNativeRegAssertSanity(pReNative);
8462# endif
8463 }
8464#else
8465 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8466#endif
8467
8468 /*
8469 * Make sure the argument variables are loaded into their respective registers.
8470 *
8471 * We can optimize this by ASSUMING that any register allocations are for
8472 * registeres that have already been loaded and are ready. The previous step
8473 * saw to that.
8474 */
8475 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8476 {
8477 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8478 {
8479 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8480 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8481 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8482 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8483 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8484 else
8485 {
8486 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8487 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8488 {
8489 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8490 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8491 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8492 | RT_BIT_32(idxArgReg);
8493 pVar->idxReg = idxArgReg;
8494 }
8495 else
8496 {
8497 /* Use ARG0 as temp for stuff we need registers for. */
8498 switch (pVar->enmKind)
8499 {
8500 case kIemNativeVarKind_Stack:
8501 {
8502 uint8_t const idxStackSlot = pVar->idxStackSlot;
8503 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8504 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8505 continue;
8506 }
8507
8508 case kIemNativeVarKind_Immediate:
8509 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8510 continue;
8511
8512 case kIemNativeVarKind_VarRef:
8513 {
8514 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8515 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8516 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8517 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8518 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8519 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8520#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8521 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8522 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8523 if ( fSimdReg
8524 && idxRegOther != UINT8_MAX)
8525 {
8526 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8527 if (cbVar == sizeof(RTUINT128U))
8528 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8529 else
8530 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8531 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8532 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8533 }
8534 else
8535#endif
8536 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8537 {
8538 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8539 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8540 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8541 }
8542 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8543 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8544 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8545 continue;
8546 }
8547
8548 case kIemNativeVarKind_GstRegRef:
8549 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8550 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8551 continue;
8552
8553 case kIemNativeVarKind_Invalid:
8554 case kIemNativeVarKind_End:
8555 break;
8556 }
8557 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8558 }
8559 }
8560 }
8561#if 0 //def VBOX_STRICT
8562 iemNativeRegAssertSanity(pReNative);
8563#endif
8564 }
8565#ifdef VBOX_STRICT
8566 else
8567 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8568 {
8569 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8570 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8571 }
8572#endif
8573
8574 /*
8575 * Free all argument variables (simplified).
8576 * Their lifetime always expires with the call they are for.
8577 */
8578 /** @todo Make the python script check that arguments aren't used after
8579 * IEM_MC_CALL_XXXX. */
8580 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8581 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8582 * an argument value. There is also some FPU stuff. */
8583 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8584 {
8585 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8586 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8587
8588 /* no need to free registers: */
8589 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8590 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8591 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8592 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8593 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8594 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8595
8596 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8597 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8598 iemNativeVarFreeStackSlots(pReNative, idxVar);
8599 }
8600 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8601
8602 /*
8603 * Flush volatile registers as we make the call.
8604 */
8605 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8606
8607 return off;
8608}
8609
8610
8611
8612/*********************************************************************************************************************************
8613* TLB Lookup. *
8614*********************************************************************************************************************************/
8615
8616/**
8617 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8618 */
8619DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8620{
8621 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8622 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8623 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8624 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8625
8626 /* Do the lookup manually. */
8627 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8628 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8629 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8630 if (RT_LIKELY(pTlbe->uTag == uTag))
8631 {
8632 /*
8633 * Check TLB page table level access flags.
8634 */
8635 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8636 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8637 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8638 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8639 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8640 | IEMTLBE_F_PG_UNASSIGNED
8641 | IEMTLBE_F_PT_NO_ACCESSED
8642 | fNoWriteNoDirty | fNoUser);
8643 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8644 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8645 {
8646 /*
8647 * Return the address.
8648 */
8649 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8650 if ((uintptr_t)pbAddr == uResult)
8651 return;
8652 RT_NOREF(cbMem);
8653 AssertFailed();
8654 }
8655 else
8656 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8657 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8658 }
8659 else
8660 AssertFailed();
8661 RT_BREAKPOINT();
8662}
8663
8664/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8665
8666
8667
8668/*********************************************************************************************************************************
8669* Recompiler Core. *
8670*********************************************************************************************************************************/
8671
8672/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8673static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8674{
8675 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8676 pDis->cbCachedInstr += cbMaxRead;
8677 RT_NOREF(cbMinRead);
8678 return VERR_NO_DATA;
8679}
8680
8681
8682DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8683{
8684 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8685 {
8686#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8687 ENTRY(fLocalForcedActions),
8688 ENTRY(iem.s.rcPassUp),
8689 ENTRY(iem.s.fExec),
8690 ENTRY(iem.s.pbInstrBuf),
8691 ENTRY(iem.s.uInstrBufPc),
8692 ENTRY(iem.s.GCPhysInstrBuf),
8693 ENTRY(iem.s.cbInstrBufTotal),
8694 ENTRY(iem.s.idxTbCurInstr),
8695#ifdef VBOX_WITH_STATISTICS
8696 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8697 ENTRY(iem.s.StatNativeTlbHitsForStore),
8698 ENTRY(iem.s.StatNativeTlbHitsForStack),
8699 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8700 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8701 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8702 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8703 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8704#endif
8705 ENTRY(iem.s.DataTlb.aEntries),
8706 ENTRY(iem.s.DataTlb.uTlbRevision),
8707 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8708 ENTRY(iem.s.DataTlb.cTlbHits),
8709 ENTRY(iem.s.CodeTlb.aEntries),
8710 ENTRY(iem.s.CodeTlb.uTlbRevision),
8711 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8712 ENTRY(iem.s.CodeTlb.cTlbHits),
8713 ENTRY(pVMR3),
8714 ENTRY(cpum.GstCtx.rax),
8715 ENTRY(cpum.GstCtx.ah),
8716 ENTRY(cpum.GstCtx.rcx),
8717 ENTRY(cpum.GstCtx.ch),
8718 ENTRY(cpum.GstCtx.rdx),
8719 ENTRY(cpum.GstCtx.dh),
8720 ENTRY(cpum.GstCtx.rbx),
8721 ENTRY(cpum.GstCtx.bh),
8722 ENTRY(cpum.GstCtx.rsp),
8723 ENTRY(cpum.GstCtx.rbp),
8724 ENTRY(cpum.GstCtx.rsi),
8725 ENTRY(cpum.GstCtx.rdi),
8726 ENTRY(cpum.GstCtx.r8),
8727 ENTRY(cpum.GstCtx.r9),
8728 ENTRY(cpum.GstCtx.r10),
8729 ENTRY(cpum.GstCtx.r11),
8730 ENTRY(cpum.GstCtx.r12),
8731 ENTRY(cpum.GstCtx.r13),
8732 ENTRY(cpum.GstCtx.r14),
8733 ENTRY(cpum.GstCtx.r15),
8734 ENTRY(cpum.GstCtx.es.Sel),
8735 ENTRY(cpum.GstCtx.es.u64Base),
8736 ENTRY(cpum.GstCtx.es.u32Limit),
8737 ENTRY(cpum.GstCtx.es.Attr),
8738 ENTRY(cpum.GstCtx.cs.Sel),
8739 ENTRY(cpum.GstCtx.cs.u64Base),
8740 ENTRY(cpum.GstCtx.cs.u32Limit),
8741 ENTRY(cpum.GstCtx.cs.Attr),
8742 ENTRY(cpum.GstCtx.ss.Sel),
8743 ENTRY(cpum.GstCtx.ss.u64Base),
8744 ENTRY(cpum.GstCtx.ss.u32Limit),
8745 ENTRY(cpum.GstCtx.ss.Attr),
8746 ENTRY(cpum.GstCtx.ds.Sel),
8747 ENTRY(cpum.GstCtx.ds.u64Base),
8748 ENTRY(cpum.GstCtx.ds.u32Limit),
8749 ENTRY(cpum.GstCtx.ds.Attr),
8750 ENTRY(cpum.GstCtx.fs.Sel),
8751 ENTRY(cpum.GstCtx.fs.u64Base),
8752 ENTRY(cpum.GstCtx.fs.u32Limit),
8753 ENTRY(cpum.GstCtx.fs.Attr),
8754 ENTRY(cpum.GstCtx.gs.Sel),
8755 ENTRY(cpum.GstCtx.gs.u64Base),
8756 ENTRY(cpum.GstCtx.gs.u32Limit),
8757 ENTRY(cpum.GstCtx.gs.Attr),
8758 ENTRY(cpum.GstCtx.rip),
8759 ENTRY(cpum.GstCtx.eflags),
8760 ENTRY(cpum.GstCtx.uRipInhibitInt),
8761 ENTRY(cpum.GstCtx.cr0),
8762 ENTRY(cpum.GstCtx.cr4),
8763 ENTRY(cpum.GstCtx.aXcr[0]),
8764 ENTRY(cpum.GstCtx.aXcr[1]),
8765#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8766 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8767 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8768 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8769 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8770 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8771 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8772 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8773 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8774 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8775 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8776 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8777 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8778 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8779 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8780 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8781 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8782 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8783 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8784 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8785 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8786 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8787 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8788 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8789 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8790 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8791 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8792 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8793 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8794 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8795 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8796 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8797 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8798#endif
8799#undef ENTRY
8800 };
8801#ifdef VBOX_STRICT
8802 static bool s_fOrderChecked = false;
8803 if (!s_fOrderChecked)
8804 {
8805 s_fOrderChecked = true;
8806 uint32_t offPrev = s_aMembers[0].off;
8807 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8808 {
8809 Assert(s_aMembers[i].off > offPrev);
8810 offPrev = s_aMembers[i].off;
8811 }
8812 }
8813#endif
8814
8815 /*
8816 * Binary lookup.
8817 */
8818 unsigned iStart = 0;
8819 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8820 for (;;)
8821 {
8822 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8823 uint32_t const offCur = s_aMembers[iCur].off;
8824 if (off < offCur)
8825 {
8826 if (iCur != iStart)
8827 iEnd = iCur;
8828 else
8829 break;
8830 }
8831 else if (off > offCur)
8832 {
8833 if (iCur + 1 < iEnd)
8834 iStart = iCur + 1;
8835 else
8836 break;
8837 }
8838 else
8839 return s_aMembers[iCur].pszName;
8840 }
8841#ifdef VBOX_WITH_STATISTICS
8842 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8843 return "iem.s.acThreadedFuncStats[iFn]";
8844#endif
8845 return NULL;
8846}
8847
8848
8849DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8850{
8851 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8852#if defined(RT_ARCH_AMD64)
8853 static const char * const a_apszMarkers[] =
8854 {
8855 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8856 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8857 };
8858#endif
8859
8860 char szDisBuf[512];
8861 DISSTATE Dis;
8862 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8863 uint32_t const cNative = pTb->Native.cInstructions;
8864 uint32_t offNative = 0;
8865#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8866 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8867#endif
8868 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8869 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8870 : DISCPUMODE_64BIT;
8871#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8872 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8873#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8874 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8875#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8876# error "Port me"
8877#else
8878 csh hDisasm = ~(size_t)0;
8879# if defined(RT_ARCH_AMD64)
8880 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8881# elif defined(RT_ARCH_ARM64)
8882 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8883# else
8884# error "Port me"
8885# endif
8886 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8887
8888 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8889 //Assert(rcCs == CS_ERR_OK);
8890#endif
8891
8892 /*
8893 * Print TB info.
8894 */
8895 pHlp->pfnPrintf(pHlp,
8896 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8897 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8898 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8899 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8900#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8901 if (pDbgInfo && pDbgInfo->cEntries > 1)
8902 {
8903 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8904
8905 /*
8906 * This disassembly is driven by the debug info which follows the native
8907 * code and indicates when it starts with the next guest instructions,
8908 * where labels are and such things.
8909 */
8910 uint32_t idxThreadedCall = 0;
8911 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8912 uint8_t idxRange = UINT8_MAX;
8913 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8914 uint32_t offRange = 0;
8915 uint32_t offOpcodes = 0;
8916 uint32_t const cbOpcodes = pTb->cbOpcodes;
8917 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8918 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8919 uint32_t iDbgEntry = 1;
8920 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8921
8922 while (offNative < cNative)
8923 {
8924 /* If we're at or have passed the point where the next chunk of debug
8925 info starts, process it. */
8926 if (offDbgNativeNext <= offNative)
8927 {
8928 offDbgNativeNext = UINT32_MAX;
8929 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8930 {
8931 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8932 {
8933 case kIemTbDbgEntryType_GuestInstruction:
8934 {
8935 /* Did the exec flag change? */
8936 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8937 {
8938 pHlp->pfnPrintf(pHlp,
8939 " fExec change %#08x -> %#08x %s\n",
8940 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8941 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8942 szDisBuf, sizeof(szDisBuf)));
8943 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8944 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8945 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8946 : DISCPUMODE_64BIT;
8947 }
8948
8949 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8950 where the compilation was aborted before the opcode was recorded and the actual
8951 instruction was translated to a threaded call. This may happen when we run out
8952 of ranges, or when some complicated interrupts/FFs are found to be pending or
8953 similar. So, we just deal with it here rather than in the compiler code as it
8954 is a lot simpler to do here. */
8955 if ( idxRange == UINT8_MAX
8956 || idxRange >= cRanges
8957 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8958 {
8959 idxRange += 1;
8960 if (idxRange < cRanges)
8961 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8962 else
8963 continue;
8964 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8965 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8966 + (pTb->aRanges[idxRange].idxPhysPage == 0
8967 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8968 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8969 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8970 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8971 pTb->aRanges[idxRange].idxPhysPage);
8972 GCPhysPc += offRange;
8973 }
8974
8975 /* Disassemble the instruction. */
8976 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8977 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8978 uint32_t cbInstr = 1;
8979 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8980 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8981 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8982 if (RT_SUCCESS(rc))
8983 {
8984 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8985 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8986 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8987 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8988
8989 static unsigned const s_offMarker = 55;
8990 static char const s_szMarker[] = " ; <--- guest";
8991 if (cch < s_offMarker)
8992 {
8993 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8994 cch = s_offMarker;
8995 }
8996 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8997 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8998
8999 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9000 }
9001 else
9002 {
9003 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9004 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9005 cbInstr = 1;
9006 }
9007 GCPhysPc += cbInstr;
9008 offOpcodes += cbInstr;
9009 offRange += cbInstr;
9010 continue;
9011 }
9012
9013 case kIemTbDbgEntryType_ThreadedCall:
9014 pHlp->pfnPrintf(pHlp,
9015 " Call #%u to %s (%u args) - %s\n",
9016 idxThreadedCall,
9017 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9018 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9019 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9020 idxThreadedCall++;
9021 continue;
9022
9023 case kIemTbDbgEntryType_GuestRegShadowing:
9024 {
9025 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9026 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9027 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9028 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9029 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9030 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9031 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9032 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9033 else
9034 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9035 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9036 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9037 continue;
9038 }
9039
9040#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9041 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9042 {
9043 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9044 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9045 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9046 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9047 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9048 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9049 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9050 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9051 else
9052 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9053 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9054 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9055 continue;
9056 }
9057#endif
9058
9059 case kIemTbDbgEntryType_Label:
9060 {
9061 const char *pszName = "what_the_fudge";
9062 const char *pszComment = "";
9063 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9064 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9065 {
9066 case kIemNativeLabelType_Return: pszName = "Return"; break;
9067 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9068 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9069 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9070 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9071 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9072 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9073 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9074 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9075 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9076 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9077 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9078 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9079 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9080 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9081 case kIemNativeLabelType_If:
9082 pszName = "If";
9083 fNumbered = true;
9084 break;
9085 case kIemNativeLabelType_Else:
9086 pszName = "Else";
9087 fNumbered = true;
9088 pszComment = " ; regs state restored pre-if-block";
9089 break;
9090 case kIemNativeLabelType_Endif:
9091 pszName = "Endif";
9092 fNumbered = true;
9093 break;
9094 case kIemNativeLabelType_CheckIrq:
9095 pszName = "CheckIrq_CheckVM";
9096 fNumbered = true;
9097 break;
9098 case kIemNativeLabelType_TlbLookup:
9099 pszName = "TlbLookup";
9100 fNumbered = true;
9101 break;
9102 case kIemNativeLabelType_TlbMiss:
9103 pszName = "TlbMiss";
9104 fNumbered = true;
9105 break;
9106 case kIemNativeLabelType_TlbDone:
9107 pszName = "TlbDone";
9108 fNumbered = true;
9109 break;
9110 case kIemNativeLabelType_Invalid:
9111 case kIemNativeLabelType_End:
9112 break;
9113 }
9114 if (fNumbered)
9115 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9116 else
9117 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9118 continue;
9119 }
9120
9121 case kIemTbDbgEntryType_NativeOffset:
9122 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9123 Assert(offDbgNativeNext > offNative);
9124 break;
9125
9126#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9127 case kIemTbDbgEntryType_DelayedPcUpdate:
9128 pHlp->pfnPrintf(pHlp,
9129 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9130 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9131 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9132 continue;
9133#endif
9134
9135 default:
9136 AssertFailed();
9137 }
9138 iDbgEntry++;
9139 break;
9140 }
9141 }
9142
9143 /*
9144 * Disassemble the next native instruction.
9145 */
9146 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9147# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9148 uint32_t cbInstr = sizeof(paNative[0]);
9149 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9150 if (RT_SUCCESS(rc))
9151 {
9152# if defined(RT_ARCH_AMD64)
9153 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9154 {
9155 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9156 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9157 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9158 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9159 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9160 uInfo & 0x8000 ? "recompiled" : "todo");
9161 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9162 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9163 else
9164 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9165 }
9166 else
9167# endif
9168 {
9169 const char *pszAnnotation = NULL;
9170# ifdef RT_ARCH_AMD64
9171 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9172 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9173 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9174 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9175 PCDISOPPARAM pMemOp;
9176 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9177 pMemOp = &Dis.Param1;
9178 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9179 pMemOp = &Dis.Param2;
9180 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9181 pMemOp = &Dis.Param3;
9182 else
9183 pMemOp = NULL;
9184 if ( pMemOp
9185 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9186 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9187 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9188 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9189
9190#elif defined(RT_ARCH_ARM64)
9191 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9192 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9193 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9194# else
9195# error "Port me"
9196# endif
9197 if (pszAnnotation)
9198 {
9199 static unsigned const s_offAnnotation = 55;
9200 size_t const cchAnnotation = strlen(pszAnnotation);
9201 size_t cchDis = strlen(szDisBuf);
9202 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9203 {
9204 if (cchDis < s_offAnnotation)
9205 {
9206 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9207 cchDis = s_offAnnotation;
9208 }
9209 szDisBuf[cchDis++] = ' ';
9210 szDisBuf[cchDis++] = ';';
9211 szDisBuf[cchDis++] = ' ';
9212 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9213 }
9214 }
9215 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9216 }
9217 }
9218 else
9219 {
9220# if defined(RT_ARCH_AMD64)
9221 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9222 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9223# elif defined(RT_ARCH_ARM64)
9224 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9225# else
9226# error "Port me"
9227# endif
9228 cbInstr = sizeof(paNative[0]);
9229 }
9230 offNative += cbInstr / sizeof(paNative[0]);
9231
9232# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9233 cs_insn *pInstr;
9234 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9235 (uintptr_t)pNativeCur, 1, &pInstr);
9236 if (cInstrs > 0)
9237 {
9238 Assert(cInstrs == 1);
9239 const char *pszAnnotation = NULL;
9240# if defined(RT_ARCH_ARM64)
9241 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9242 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9243 {
9244 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9245 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9246 char *psz = strchr(pInstr->op_str, '[');
9247 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9248 {
9249 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9250 int32_t off = -1;
9251 psz += 4;
9252 if (*psz == ']')
9253 off = 0;
9254 else if (*psz == ',')
9255 {
9256 psz = RTStrStripL(psz + 1);
9257 if (*psz == '#')
9258 off = RTStrToInt32(&psz[1]);
9259 /** @todo deal with index registers and LSL as well... */
9260 }
9261 if (off >= 0)
9262 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9263 }
9264 }
9265# endif
9266
9267 size_t const cchOp = strlen(pInstr->op_str);
9268# if defined(RT_ARCH_AMD64)
9269 if (pszAnnotation)
9270 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9271 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9272 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9273 else
9274 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9275 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9276
9277# else
9278 if (pszAnnotation)
9279 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9280 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9281 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9282 else
9283 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9284 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9285# endif
9286 offNative += pInstr->size / sizeof(*pNativeCur);
9287 cs_free(pInstr, cInstrs);
9288 }
9289 else
9290 {
9291# if defined(RT_ARCH_AMD64)
9292 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9293 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9294# else
9295 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9296# endif
9297 offNative++;
9298 }
9299# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9300 }
9301 }
9302 else
9303#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9304 {
9305 /*
9306 * No debug info, just disassemble the x86 code and then the native code.
9307 *
9308 * First the guest code:
9309 */
9310 for (unsigned i = 0; i < pTb->cRanges; i++)
9311 {
9312 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9313 + (pTb->aRanges[i].idxPhysPage == 0
9314 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9315 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9316 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9317 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9318 unsigned off = pTb->aRanges[i].offOpcodes;
9319 /** @todo this ain't working when crossing pages! */
9320 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9321 while (off < cbOpcodes)
9322 {
9323 uint32_t cbInstr = 1;
9324 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9325 &pTb->pabOpcodes[off], cbOpcodes - off,
9326 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9327 if (RT_SUCCESS(rc))
9328 {
9329 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9330 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9331 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9332 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9333 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9334 GCPhysPc += cbInstr;
9335 off += cbInstr;
9336 }
9337 else
9338 {
9339 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9340 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9341 break;
9342 }
9343 }
9344 }
9345
9346 /*
9347 * Then the native code:
9348 */
9349 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9350 while (offNative < cNative)
9351 {
9352 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9353# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9354 uint32_t cbInstr = sizeof(paNative[0]);
9355 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9356 if (RT_SUCCESS(rc))
9357 {
9358# if defined(RT_ARCH_AMD64)
9359 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9360 {
9361 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9362 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9363 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9364 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9365 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9366 uInfo & 0x8000 ? "recompiled" : "todo");
9367 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9368 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9369 else
9370 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9371 }
9372 else
9373# endif
9374 {
9375# ifdef RT_ARCH_AMD64
9376 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9377 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9378 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9379 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9380# elif defined(RT_ARCH_ARM64)
9381 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9382 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9383 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9384# else
9385# error "Port me"
9386# endif
9387 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9388 }
9389 }
9390 else
9391 {
9392# if defined(RT_ARCH_AMD64)
9393 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9394 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9395# else
9396 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9397# endif
9398 cbInstr = sizeof(paNative[0]);
9399 }
9400 offNative += cbInstr / sizeof(paNative[0]);
9401
9402# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9403 cs_insn *pInstr;
9404 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9405 (uintptr_t)pNativeCur, 1, &pInstr);
9406 if (cInstrs > 0)
9407 {
9408 Assert(cInstrs == 1);
9409# if defined(RT_ARCH_AMD64)
9410 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9411 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9412# else
9413 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9414 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9415# endif
9416 offNative += pInstr->size / sizeof(*pNativeCur);
9417 cs_free(pInstr, cInstrs);
9418 }
9419 else
9420 {
9421# if defined(RT_ARCH_AMD64)
9422 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9423 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9424# else
9425 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9426# endif
9427 offNative++;
9428 }
9429# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9430 }
9431 }
9432
9433#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9434 /* Cleanup. */
9435 cs_close(&hDisasm);
9436#endif
9437}
9438
9439
9440/**
9441 * Recompiles the given threaded TB into a native one.
9442 *
9443 * In case of failure the translation block will be returned as-is.
9444 *
9445 * @returns pTb.
9446 * @param pVCpu The cross context virtual CPU structure of the calling
9447 * thread.
9448 * @param pTb The threaded translation to recompile to native.
9449 */
9450DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9451{
9452 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9453
9454 /*
9455 * The first time thru, we allocate the recompiler state, the other times
9456 * we just need to reset it before using it again.
9457 */
9458 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9459 if (RT_LIKELY(pReNative))
9460 iemNativeReInit(pReNative, pTb);
9461 else
9462 {
9463 pReNative = iemNativeInit(pVCpu, pTb);
9464 AssertReturn(pReNative, pTb);
9465 }
9466
9467#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9468 /*
9469 * First do liveness analysis. This is done backwards.
9470 */
9471 {
9472 uint32_t idxCall = pTb->Thrd.cCalls;
9473 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9474 { /* likely */ }
9475 else
9476 {
9477 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9478 while (idxCall > cAlloc)
9479 cAlloc *= 2;
9480 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9481 AssertReturn(pvNew, pTb);
9482 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9483 pReNative->cLivenessEntriesAlloc = cAlloc;
9484 }
9485 AssertReturn(idxCall > 0, pTb);
9486 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9487
9488 /* The initial (final) entry. */
9489 idxCall--;
9490 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9491
9492 /* Loop backwards thru the calls and fill in the other entries. */
9493 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9494 while (idxCall > 0)
9495 {
9496 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9497 if (pfnLiveness)
9498 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9499 else
9500 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9501 pCallEntry--;
9502 idxCall--;
9503 }
9504
9505# ifdef VBOX_WITH_STATISTICS
9506 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9507 to 'clobbered' rather that 'input'. */
9508 /** @todo */
9509# endif
9510 }
9511#endif
9512
9513 /*
9514 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9515 * for aborting if an error happens.
9516 */
9517 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9518#ifdef LOG_ENABLED
9519 uint32_t const cCallsOrg = cCallsLeft;
9520#endif
9521 uint32_t off = 0;
9522 int rc = VINF_SUCCESS;
9523 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9524 {
9525 /*
9526 * Emit prolog code (fixed).
9527 */
9528 off = iemNativeEmitProlog(pReNative, off);
9529
9530 /*
9531 * Convert the calls to native code.
9532 */
9533#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9534 int32_t iGstInstr = -1;
9535#endif
9536#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9537 uint32_t cThreadedCalls = 0;
9538 uint32_t cRecompiledCalls = 0;
9539#endif
9540#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9541 uint32_t idxCurCall = 0;
9542#endif
9543 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9544 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9545 while (cCallsLeft-- > 0)
9546 {
9547 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9548#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9549 pReNative->idxCurCall = idxCurCall;
9550#endif
9551
9552 /*
9553 * Debug info, assembly markup and statistics.
9554 */
9555#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9556 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9557 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9558#endif
9559#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9560 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9561 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9562 {
9563 if (iGstInstr < (int32_t)pTb->cInstructions)
9564 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9565 else
9566 Assert(iGstInstr == pTb->cInstructions);
9567 iGstInstr = pCallEntry->idxInstr;
9568 }
9569 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9570#endif
9571#if defined(VBOX_STRICT)
9572 off = iemNativeEmitMarker(pReNative, off,
9573 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9574#endif
9575#if defined(VBOX_STRICT)
9576 iemNativeRegAssertSanity(pReNative);
9577#endif
9578#ifdef VBOX_WITH_STATISTICS
9579 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9580#endif
9581
9582 /*
9583 * Actual work.
9584 */
9585 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9586 pfnRecom ? "(recompiled)" : "(todo)"));
9587 if (pfnRecom) /** @todo stats on this. */
9588 {
9589 off = pfnRecom(pReNative, off, pCallEntry);
9590 STAM_REL_STATS({cRecompiledCalls++;});
9591 }
9592 else
9593 {
9594 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9595 STAM_REL_STATS({cThreadedCalls++;});
9596 }
9597 Assert(off <= pReNative->cInstrBufAlloc);
9598 Assert(pReNative->cCondDepth == 0);
9599
9600#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9601 if (LogIs2Enabled())
9602 {
9603 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9604# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9605 static const char s_achState[] = "CUXI";
9606# else
9607 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9608# endif
9609
9610 char szGpr[17];
9611 for (unsigned i = 0; i < 16; i++)
9612 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9613 szGpr[16] = '\0';
9614
9615 char szSegBase[X86_SREG_COUNT + 1];
9616 char szSegLimit[X86_SREG_COUNT + 1];
9617 char szSegAttrib[X86_SREG_COUNT + 1];
9618 char szSegSel[X86_SREG_COUNT + 1];
9619 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9620 {
9621 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9622 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9623 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9624 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9625 }
9626 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9627 = szSegSel[X86_SREG_COUNT] = '\0';
9628
9629 char szEFlags[8];
9630 for (unsigned i = 0; i < 7; i++)
9631 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9632 szEFlags[7] = '\0';
9633
9634 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9635 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9636 }
9637#endif
9638
9639 /*
9640 * Advance.
9641 */
9642 pCallEntry++;
9643#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9644 idxCurCall++;
9645#endif
9646 }
9647
9648 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9649 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9650 if (!cThreadedCalls)
9651 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9652
9653 /*
9654 * Emit the epilog code.
9655 */
9656 uint32_t idxReturnLabel;
9657 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9658
9659 /*
9660 * Generate special jump labels.
9661 */
9662 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9663 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9664 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9665 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9666
9667 /*
9668 * Generate simple TB tail labels that just calls a help with a pVCpu
9669 * arg and either return or longjmps/throws a non-zero status.
9670 *
9671 * The array entries must be ordered by enmLabel value so we can index
9672 * using fTailLabels bit numbers.
9673 */
9674 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9675 static struct
9676 {
9677 IEMNATIVELABELTYPE enmLabel;
9678 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9679 } const g_aSimpleTailLabels[] =
9680 {
9681 { kIemNativeLabelType_Invalid, NULL },
9682 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9683 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9684 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9685 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9686 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9687 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9688 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9689 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9690 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9691 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9692 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9693 };
9694 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9695 AssertCompile(kIemNativeLabelType_Invalid == 0);
9696 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9697 if (fTailLabels)
9698 {
9699 do
9700 {
9701 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9702 fTailLabels &= ~RT_BIT_64(enmLabel);
9703 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9704
9705 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9706 Assert(idxLabel != UINT32_MAX);
9707 if (idxLabel != UINT32_MAX)
9708 {
9709 iemNativeLabelDefine(pReNative, idxLabel, off);
9710
9711 /* int pfnCallback(PVMCPUCC pVCpu) */
9712 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9713 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9714
9715 /* jump back to the return sequence. */
9716 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9717 }
9718
9719 } while (fTailLabels);
9720 }
9721 }
9722 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9723 {
9724 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9725 return pTb;
9726 }
9727 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9728 Assert(off <= pReNative->cInstrBufAlloc);
9729
9730 /*
9731 * Make sure all labels has been defined.
9732 */
9733 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9734#ifdef VBOX_STRICT
9735 uint32_t const cLabels = pReNative->cLabels;
9736 for (uint32_t i = 0; i < cLabels; i++)
9737 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9738#endif
9739
9740 /*
9741 * Allocate executable memory, copy over the code we've generated.
9742 */
9743 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9744 if (pTbAllocator->pDelayedFreeHead)
9745 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9746
9747 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9748 AssertReturn(paFinalInstrBuf, pTb);
9749 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9750
9751 /*
9752 * Apply fixups.
9753 */
9754 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9755 uint32_t const cFixups = pReNative->cFixups;
9756 for (uint32_t i = 0; i < cFixups; i++)
9757 {
9758 Assert(paFixups[i].off < off);
9759 Assert(paFixups[i].idxLabel < cLabels);
9760 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9761 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9762 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9763 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9764 switch (paFixups[i].enmType)
9765 {
9766#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9767 case kIemNativeFixupType_Rel32:
9768 Assert(paFixups[i].off + 4 <= off);
9769 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9770 continue;
9771
9772#elif defined(RT_ARCH_ARM64)
9773 case kIemNativeFixupType_RelImm26At0:
9774 {
9775 Assert(paFixups[i].off < off);
9776 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9777 Assert(offDisp >= -262144 && offDisp < 262144);
9778 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9779 continue;
9780 }
9781
9782 case kIemNativeFixupType_RelImm19At5:
9783 {
9784 Assert(paFixups[i].off < off);
9785 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9786 Assert(offDisp >= -262144 && offDisp < 262144);
9787 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9788 continue;
9789 }
9790
9791 case kIemNativeFixupType_RelImm14At5:
9792 {
9793 Assert(paFixups[i].off < off);
9794 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9795 Assert(offDisp >= -8192 && offDisp < 8192);
9796 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9797 continue;
9798 }
9799
9800#endif
9801 case kIemNativeFixupType_Invalid:
9802 case kIemNativeFixupType_End:
9803 break;
9804 }
9805 AssertFailed();
9806 }
9807
9808 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9809 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9810
9811 /*
9812 * Convert the translation block.
9813 */
9814 RTMemFree(pTb->Thrd.paCalls);
9815 pTb->Native.paInstructions = paFinalInstrBuf;
9816 pTb->Native.cInstructions = off;
9817 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9818#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9819 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9820 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9821#endif
9822
9823 Assert(pTbAllocator->cThreadedTbs > 0);
9824 pTbAllocator->cThreadedTbs -= 1;
9825 pTbAllocator->cNativeTbs += 1;
9826 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9827
9828#ifdef LOG_ENABLED
9829 /*
9830 * Disassemble to the log if enabled.
9831 */
9832 if (LogIs3Enabled())
9833 {
9834 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9835 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9836# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9837 RTLogFlush(NULL);
9838# endif
9839 }
9840#endif
9841 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9842
9843 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9844 return pTb;
9845}
9846
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette